1# module pyparsing.py 2# 3# Copyright (c) 2003-2008 Paul T. McGuire 4# 5# Permission is hereby granted, free of charge, to any person obtaining 6# a copy of this software and associated documentation files (the 7# "Software"), to deal in the Software without restriction, including 8# without limitation the rights to use, copy, modify, merge, publish, 9# distribute, sublicense, and/or sell copies of the Software, and to 10# permit persons to whom the Software is furnished to do so, subject to 11# the following conditions: 12# 13# The above copyright notice and this permission notice shall be 14# included in all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23# 24 25__doc__ = """ 26pyparsing module - Classes and methods to define and execute parsing grammars 27 28The pyparsing module is an alternative approach to creating and executing simple grammars, 29vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you 30don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 31provides a library of classes that you use to construct the grammar directly in Python. 32 33Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: 34 35 from pyparsing import Word, alphas 36 37 # define grammar of a greeting 38 greet = Word( alphas ) + "," + Word( alphas ) + "!" 39 40 hello = "Hello, World!" 41 print hello, "->", greet.parseString( hello ) 42 43The program outputs the following:: 44 45 Hello, World! -> ['Hello', ',', 'World', '!'] 46 47The Python representation of the grammar is quite readable, owing to the self-explanatory 48class names, and the use of '+', '|' and '^' operators. 49 50The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 51object with named attributes. 52 53The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 54 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.) 55 - quoted strings 56 - embedded comments 57""" 58 59__version__ = "1.5.0" 60__versionTime__ = "28 May 2008 10:05" 61__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 62 63import copy 64import re 65import string 66import sys 67import warnings 68import xml.sax.saxutils 69from weakref import ref as wkref 70 71import sre_constants 72 73__all__ = [ 74 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 75 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 76 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 77 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 78 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 79 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 80 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 81 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 82 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 83 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 84 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 85 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 86 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 87 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 88 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 89 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 90 'indentedBlock', 91] 92 93 94""" 95Detect if we are running version 3.X and make appropriate changes 96Robert A. Clark 97""" 98_MAX_INT = sys.maxsize 99 100 101def _str2dict(strg): 102 return {c: 0 for c in strg} 103 # ~ return set( [c for c in strg] ) 104 105 106class _Constants: 107 pass 108 109 110alphas = string.ascii_lowercase + string.ascii_uppercase 111nums = string.digits 112hexnums = nums + "ABCDEFabcdef" 113alphanums = alphas + nums 114_bslash = "\\" 115printables = "".join([c for c in string.printable if c not in string.whitespace]) 116 117 118class ParseBaseException(Exception): 119 """base exception class for all parsing runtime exceptions""" 120 __slots__ = ("loc", "msg", "pstr", "parserElement") 121 # Performance tuning: we construct a *lot* of these, so keep this 122 # constructor as small and fast as possible 123 124 def __init__(self, pstr, loc=0, msg=None, elem=None): 125 self.loc = loc 126 if msg is None: 127 self.msg = pstr 128 self.pstr = "" 129 else: 130 self.msg = msg 131 self.pstr = pstr 132 self.parserElement = elem 133 134 def __getattr__(self, aname): 135 """supported attributes by name are: 136 - lineno - returns the line number of the exception text 137 - col - returns the column number of the exception text 138 - line - returns the line containing the exception text 139 """ 140 if(aname == "lineno"): 141 return lineno(self.loc, self.pstr) 142 elif(aname in ("col", "column")): 143 return col(self.loc, self.pstr) 144 elif(aname == "line"): 145 return line(self.loc, self.pstr) 146 else: 147 raise AttributeError(aname) 148 149 def __str__(self): 150 return "%s (at char %d), (line:%d, col:%d)" % (self.msg, self.loc, self.lineno, self.column) 151 152 def __repr__(self): 153 return str(self) 154 155 def markInputline(self, markerString=">!<"): 156 """Extracts the exception line from the input string, and marks 157 the location of the exception with a special symbol. 158 """ 159 line_str = self.line 160 line_column = self.column - 1 161 if markerString: 162 line_str = "".join([line_str[:line_column], 163 markerString, line_str[line_column:]]) 164 return line_str.strip() 165 166 167class ParseException(ParseBaseException): 168 """exception thrown when parse expressions don't match class; 169 supported attributes by name are: 170 - lineno - returns the line number of the exception text 171 - col - returns the column number of the exception text 172 - line - returns the line containing the exception text 173 """ 174 175 176class ParseFatalException(ParseBaseException): 177 """user-throwable exception thrown when inconsistent parse content 178 is found; stops all parsing immediately""" 179 180 181class ParseSyntaxException(ParseFatalException): 182 """just like ParseFatalException, but thrown internally when an 183 ErrorStop indicates that parsing is to stop immediately because 184 an unbacktrackable syntax error has been found""" 185 186 def __init__(self, pe): 187 super().__init__(pe.pstr, pe.loc, pe.msg, pe.parserElement) 188 189 190class RecursiveGrammarException(Exception): 191 """exception thrown by validate() if the grammar could be improperly recursive""" 192 193 def __init__(self, parseElementList): 194 self.parseElementTrace = parseElementList 195 196 def __str__(self): 197 return "RecursiveGrammarException: %s" % self.parseElementTrace 198 199 200class _ParseResultsWithOffset: 201 def __init__(self, p1, p2): 202 self.tup = (p1, p2) 203 204 def __getitem__(self, i): 205 return self.tup[i] 206 207 def __repr__(self): 208 return repr(self.tup) 209 210 211class ParseResults: 212 """Structured parse results, to provide multiple means of access to the parsed data: 213 - as a list (len(results)) 214 - by list index (results[0], results[1], etc.) 215 - by attribute (results.<resultsName>) 216 """ 217 __slots__ = ("__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__") 218 219 def __new__(cls, toklist, name=None, asList=True, modal=True): 220 if isinstance(toklist, cls): 221 return toklist 222 retobj = object.__new__(cls) 223 retobj.__doinit = True 224 return retobj 225 226 # Performance tuning: we construct a *lot* of these, so keep this 227 # constructor as small and fast as possible 228 def __init__(self, toklist, name=None, asList=True, modal=True): 229 if self.__doinit: 230 self.__doinit = False 231 self.__name = None 232 self.__parent = None 233 self.__accumNames = {} 234 if isinstance(toklist, list): 235 self.__toklist = toklist[:] 236 else: 237 self.__toklist = [toklist] 238 self.__tokdict = dict() 239 240 # this line is related to debugging the asXML bug 241 # ~ asList = False 242 243 if name: 244 if not modal: 245 self.__accumNames[name] = 0 246 if isinstance(name, int): 247 name = str(name) 248 self.__name = name 249 if toklist not in (None, '', []): 250 if isinstance(toklist, str): 251 toklist = [toklist] 252 if asList: 253 if isinstance(toklist, ParseResults): 254 self[name] = _ParseResultsWithOffset(toklist.copy(), -1) 255 else: 256 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), -1) 257 self[name].__name = name 258 else: 259 try: 260 self[name] = toklist[0] 261 except (KeyError, TypeError): 262 self[name] = toklist 263 264 def __getitem__(self, i): 265 if isinstance(i, (int, slice)): 266 return self.__toklist[i] 267 else: 268 if i not in self.__accumNames: 269 return self.__tokdict[i][-1][0] 270 else: 271 return ParseResults([v[0] for v in self.__tokdict[i]]) 272 273 def __setitem__(self, k, v): 274 if isinstance(v, _ParseResultsWithOffset): 275 self.__tokdict[k] = self.__tokdict.get(k, list()) + [v] 276 sub = v[0] 277 elif isinstance(k, int): 278 self.__toklist[k] = v 279 sub = v 280 else: 281 self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)] 282 sub = v 283 if isinstance(sub, ParseResults): 284 sub.__parent = wkref(self) 285 286 def __delitem__(self, i): 287 if isinstance(i, (int, slice)): 288 mylen = len(self.__toklist) 289 del self.__toklist[i] 290 291 # convert int to slice 292 if isinstance(i, int): 293 if i < 0: 294 i += mylen 295 i = slice(i, i+1) 296 # get removed indices 297 removed = list(range(*i.indices(mylen))) 298 removed.reverse() 299 # fixup indices in token dictionary 300 for name in self.__tokdict: 301 occurrences = self.__tokdict[name] 302 for j in removed: 303 for k, (value, position) in enumerate(occurrences): 304 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 305 else: 306 del self.__tokdict[i] 307 308 def __contains__(self, k): 309 return k in self.__tokdict 310 311 def __len__(self): 312 return len(self.__toklist) 313 314 def __bool__(self): 315 return len(self.__toklist) > 0 316 317 __nonzero__ = __bool__ 318 319 def __iter__(self): 320 return iter(self.__toklist) 321 322 def __reversed__(self): 323 return iter(reversed(self.__toklist)) 324 325 def keys(self): 326 """Returns all named result keys.""" 327 return self.__tokdict.keys() 328 329 def pop(self, index=-1): 330 """Removes and returns item at specified index (default=last). 331 Will work with either numeric indices or dict-key indicies.""" 332 ret = self[index] 333 del self[index] 334 return ret 335 336 def get(self, key, defaultValue=None): 337 """Returns named result matching the given key, or if there is no 338 such name, then returns the given defaultValue or None if no 339 defaultValue is specified.""" 340 if key in self: 341 return self[key] 342 else: 343 return defaultValue 344 345 def items(self): 346 """Returns all named result keys and values as a list of tuples.""" 347 return [(k, self[k]) for k in self.__tokdict] 348 349 def values(self): 350 """Returns all named result values.""" 351 return [v[-1][0] for v in self.__tokdict.values()] 352 353 def __getattr__(self, name): 354 if name not in self.__slots__: 355 if name in self.__tokdict: 356 if name not in self.__accumNames: 357 return self.__tokdict[name][-1][0] 358 else: 359 return ParseResults([v[0] for v in self.__tokdict[name]]) 360 else: 361 return "" 362 return None 363 364 def __add__(self, other): 365 ret = self.copy() 366 ret += other 367 return ret 368 369 def __iadd__(self, other): 370 if other.__tokdict: 371 offset = len(self.__toklist) 372 addoffset = (lambda a: (a < 0 and offset) or (a+offset)) 373 otheritems = other.__tokdict.items() 374 otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) 375 for (k, vlist) in otheritems for v in vlist] 376 for k, v in otherdictitems: 377 self[k] = v 378 if isinstance(v[0], ParseResults): 379 v[0].__parent = wkref(self) 380 self.__toklist += other.__toklist 381 self.__accumNames.update(other.__accumNames) 382 del other 383 return self 384 385 def __repr__(self): 386 return f"({repr(self.__toklist)}, {repr(self.__tokdict)})" 387 388 def __str__(self): 389 out = "[" 390 sep = "" 391 for i in self.__toklist: 392 if isinstance(i, ParseResults): 393 out += sep + str(i) 394 else: 395 out += sep + repr(i) 396 sep = ", " 397 out += "]" 398 return out 399 400 def _asStringList(self, sep=''): 401 out = [] 402 for item in self.__toklist: 403 if out and sep: 404 out.append(sep) 405 if isinstance(item, ParseResults): 406 out += item._asStringList() 407 else: 408 out.append(str(item)) 409 return out 410 411 def asList(self): 412 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 413 out = [] 414 for res in self.__toklist: 415 if isinstance(res, ParseResults): 416 out.append(res.asList()) 417 else: 418 out.append(res) 419 return out 420 421 def asDict(self): 422 """Returns the named parse results as dictionary.""" 423 return dict(self.items()) 424 425 def copy(self): 426 """Returns a new copy of a ParseResults object.""" 427 ret = ParseResults(self.__toklist) 428 ret.__tokdict = self.__tokdict.copy() 429 ret.__parent = self.__parent 430 ret.__accumNames.update(self.__accumNames) 431 ret.__name = self.__name 432 return ret 433 434 def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): 435 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 436 nl = "\n" 437 out = [] 438 namedItems = {v[1]: k for (k, vlist) in self.__tokdict.items() for v in vlist} 439 nextLevelIndent = indent + " " 440 441 # collapse out indents if formatting is not desired 442 if not formatted: 443 indent = "" 444 nextLevelIndent = "" 445 nl = "" 446 447 selfTag = None 448 if doctag is not None: 449 selfTag = doctag 450 else: 451 if self.__name: 452 selfTag = self.__name 453 454 if not selfTag: 455 if namedItemsOnly: 456 return "" 457 else: 458 selfTag = "ITEM" 459 460 out += [nl, indent, "<", selfTag, ">"] 461 462 worklist = self.__toklist 463 for i, res in enumerate(worklist): 464 if isinstance(res, ParseResults): 465 if i in namedItems: 466 out += [res.asXML( 467 namedItems[i], 468 namedItemsOnly and doctag is None, 469 nextLevelIndent, 470 formatted)] 471 else: 472 out += [res.asXML( 473 None, 474 namedItemsOnly and doctag is None, 475 nextLevelIndent, 476 formatted)] 477 else: 478 # individual token, see if there is a name for it 479 resTag = None 480 if i in namedItems: 481 resTag = namedItems[i] 482 if not resTag: 483 if namedItemsOnly: 484 continue 485 else: 486 resTag = "ITEM" 487 xmlBodyText = xml.sax.saxutils.escape(str(res)) 488 out += [nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">"] 489 490 out += [nl, indent, "</", selfTag, ">"] 491 return "".join(out) 492 493 def __lookup(self, sub): 494 for k, vlist in self.__tokdict.items(): 495 for v, _loc in vlist: 496 if sub is v: 497 return k 498 return None 499 500 def getName(self): 501 """Returns the results name for this token expression.""" 502 if self.__name: 503 return self.__name 504 elif self.__parent: 505 par = self.__parent() 506 if par: 507 return par.__lookup(self) 508 else: 509 return None 510 elif (len(self) == 1 511 and len(self.__tokdict) == 1 512 and self.__tokdict.values()[0][0][1] in (0, -1)): 513 return self.__tokdict.keys()[0] 514 else: 515 return None 516 517 def dump(self, indent='', depth=0): 518 """Diagnostic method for listing out the contents of a ParseResults. 519 Accepts an optional indent argument so that this string can be embedded 520 in a nested display of other data.""" 521 out = [] 522 out.append(indent+str(self.asList())) 523 keys = sorted(self.items()) 524 for k, v in keys: 525 if out: 526 out.append('\n') 527 out.append("{}{}- {}: ".format(indent, (' '*depth), k)) 528 if isinstance(v, ParseResults): 529 if v.keys(): 530 out.append(v.dump(indent, depth+1)) 531 else: 532 out.append(str(v)) 533 else: 534 out.append(str(v)) 535 return "".join(out) 536 537 # add support for pickle protocol 538 def __getstate__(self): 539 return (self.__toklist, ( 540 self.__tokdict.copy(), 541 self.__parent is not None and self.__parent() or None, 542 self.__accumNames, 543 self.__name)) 544 545 def __setstate__(self, state): 546 self.__toklist = state[0] 547 self.__tokdict, par, inAccumNames, self.__name = state[1] 548 self.__accumNames = {} 549 self.__accumNames.update(inAccumNames) 550 if par is not None: 551 self.__parent = wkref(par) 552 else: 553 self.__parent = None 554 555 556def col(loc, strg): 557 """Returns current column within a string, counting newlines as line separators. 558 The first column is number 1. 559 560 Note: the default parsing behavior is to expand tabs in the input string 561 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 562 on parsing strings containing <TAB>s, and suggested methods to maintain a 563 consistent view of the parsed string, the parse location, and line and column 564 positions within the parsed string. 565 """ 566 return (loc < len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc) 567 568 569def lineno(loc, strg): 570 """Returns current line number within a string, counting newlines as line separators. 571 The first line is number 1. 572 573 Note: the default parsing behavior is to expand tabs in the input string 574 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 575 on parsing strings containing <TAB>s, and suggested methods to maintain a 576 consistent view of the parsed string, the parse location, and line and column 577 positions within the parsed string. 578 """ 579 return strg.count("\n", 0, loc) + 1 580 581 582def line(loc, strg): 583 """Returns the line of text containing loc within a string, counting newlines as line separators. 584 """ 585 lastCR = strg.rfind("\n", 0, loc) 586 nextCR = strg.find("\n", loc) 587 if nextCR > 0: 588 return strg[lastCR+1:nextCR] 589 else: 590 return strg[lastCR+1:] 591 592 593def _defaultStartDebugAction(instring, loc, expr): 594 print("Match " + str(expr) + " at loc " + str(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))) 595 596 597def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks): 598 print("Matched " + str(expr) + " -> " + str(toks.asList())) 599 600 601def _defaultExceptionDebugAction(instring, loc, expr, exc): 602 print("Exception raised:" + str(exc)) 603 604 605def nullDebugAction(*args): 606 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 607 608 609class ParserElement: 610 """Abstract base level parser element class.""" 611 DEFAULT_WHITE_CHARS = " \n\t\r" 612 613 def setDefaultWhitespaceChars(chars): 614 """Overrides the default whitespace chars 615 """ 616 ParserElement.DEFAULT_WHITE_CHARS = chars 617 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 618 619 def __init__(self, savelist=False): 620 self.parseAction = list() 621 self.failAction = None 622 # ~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 623 self.strRepr = None 624 self.resultsName = None 625 self.saveAsList = savelist 626 self.skipWhitespace = True 627 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 628 self.copyDefaultWhiteChars = True 629 self.mayReturnEmpty = False # used when checking for left-recursion 630 self.keepTabs = False 631 self.ignoreExprs = list() 632 self.debug = False 633 self.streamlined = False 634 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 635 self.errmsg = "" 636 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 637 self.debugActions = (None, None, None) # custom debug actions 638 self.re = None 639 self.callPreparse = True # used to avoid redundant calls to preParse 640 self.callDuringTry = False 641 642 def copy(self): 643 """Make a copy of this ParserElement. Useful for defining different parse actions 644 for the same parsing pattern, using copies of the original parse element.""" 645 cpy = copy.copy(self) 646 cpy.parseAction = self.parseAction[:] 647 cpy.ignoreExprs = self.ignoreExprs[:] 648 if self.copyDefaultWhiteChars: 649 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 650 return cpy 651 652 def setName(self, name): 653 """Define name for this expression, for use in debugging.""" 654 self.name = name 655 self.errmsg = "Expected " + self.name 656 if hasattr(self, "exception"): 657 self.exception.msg = self.errmsg 658 return self 659 660 def setResultsName(self, name, listAllMatches=False): 661 """Define name for referencing matching tokens as a nested attribute 662 of the returned parse results. 663 NOTE: this returns a *copy* of the original ParserElement object; 664 this is so that the client can define a basic element, such as an 665 integer, and reference it in multiple places with different names. 666 """ 667 newself = self.copy() 668 newself.resultsName = name 669 newself.modalResults = not listAllMatches 670 return newself 671 672 def setBreak(self, breakFlag=True): 673 """Method to invoke the Python pdb debugger when this element is 674 about to be parsed. Set breakFlag to True to enable, False to 675 disable. 676 """ 677 if breakFlag: 678 _parseMethod = self._parse 679 680 def breaker(instring, loc, doActions=True, callPreParse=True): 681 import pdb 682 pdb.set_trace() 683 _parseMethod(instring, loc, doActions, callPreParse) 684 breaker._originalParseMethod = _parseMethod 685 self._parse = breaker 686 else: 687 if hasattr(self._parse, "_originalParseMethod"): 688 self._parse = self._parse._originalParseMethod 689 return self 690 691 def _normalizeParseActionArgs(f): 692 """Internal method used to decorate parse actions that take fewer than 3 arguments, 693 so that all parse actions can be called as f(s,l,t).""" 694 STAR_ARGS = 4 695 696 try: 697 restore = None 698 if isinstance(f, type): 699 restore = f 700 f = f.__init__ 701 codeObj = f.code 702 if codeObj.co_flags & STAR_ARGS: 703 return f 704 numargs = codeObj.co_argcount 705 if hasattr(f, "__self__"): 706 numargs -= 1 707 if restore: 708 f = restore 709 except AttributeError: 710 try: 711 call_im_func_code = f.__code__ 712 713 # not a function, must be a callable object, get info from the 714 # im_func binding of its bound __call__ method 715 if call_im_func_code.co_flags & STAR_ARGS: 716 return f 717 numargs = call_im_func_code.co_argcount 718 if hasattr(f.__call__, "__self__"): 719 numargs -= 0 720 except AttributeError: 721 call_func_code = f.__call__.__code__ 722 # not a bound method, get info directly from __call__ method 723 if call_func_code.co_flags & STAR_ARGS: 724 return f 725 numargs = call_func_code.co_argcount 726 if hasattr(f.__call__, "__self__"): 727 numargs -= 1 728 729 # ~ print ("adding function %s with %d args" % (f.func_name,numargs)) 730 if numargs == 3: 731 return f 732 else: 733 if numargs > 3: 734 def tmp(s, l, t): 735 return f(f.__call__.__self__, s, l, t) 736 elif numargs == 2: 737 def tmp(s, l, t): 738 return f(l, t) 739 elif numargs == 1: 740 def tmp(s, l, t): 741 return f(t) 742 else: # ~ numargs == 0: 743 def tmp(s, l, t): 744 return f() 745 try: 746 tmp.__name__ = f.__name__ 747 except (AttributeError, TypeError): 748 # no need for special handling if attribute doesnt exist 749 pass 750 try: 751 tmp.__doc__ = f.__doc__ 752 except (AttributeError, TypeError): 753 # no need for special handling if attribute doesnt exist 754 pass 755 try: 756 tmp.__dict__.update(f.__dict__) 757 except (AttributeError, TypeError): 758 # no need for special handling if attribute doesnt exist 759 pass 760 return tmp 761 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 762 763 def setParseAction(self, *fns, **kwargs): 764 """Define action to perform when successfully matching parse element definition. 765 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 766 fn(loc,toks), fn(toks), or just fn(), where: 767 - s = the original string being parsed (see note below) 768 - loc = the location of the matching substring 769 - toks = a list of the matched tokens, packaged as a ParseResults object 770 If the functions in fns modify the tokens, they can return them as the return 771 value from fn, and the modified list of tokens will replace the original. 772 Otherwise, fn does not need to return any value. 773 774 Note: the default parsing behavior is to expand tabs in the input string 775 before starting the parsing process. See L{I{parseString}<parseString>} for more information 776 on parsing strings containing <TAB>s, and suggested methods to maintain a 777 consistent view of the parsed string, the parse location, and line and column 778 positions within the parsed string. 779 """ 780 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 781 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 782 return self 783 784 def addParseAction(self, *fns, **kwargs): 785 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 786 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 787 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 788 return self 789 790 def setFailAction(self, fn): 791 """Define action to perform if parsing fails at this expression. 792 Fail acton fn is a callable function that takes the arguments 793 fn(s,loc,expr,err) where: 794 - s = string being parsed 795 - loc = location where expression match was attempted and failed 796 - expr = the parse expression that failed 797 - err = the exception thrown 798 The function returns no value. It may throw ParseFatalException 799 if it is desired to stop parsing immediately.""" 800 self.failAction = fn 801 return self 802 803 def _skipIgnorables(self, instring, loc): 804 exprsFound = True 805 while exprsFound: 806 exprsFound = False 807 for e in self.ignoreExprs: 808 try: 809 while True: 810 loc, dummy = e._parse(instring, loc) 811 exprsFound = True 812 except ParseException: 813 pass 814 return loc 815 816 def preParse(self, instring, loc): 817 if self.ignoreExprs: 818 loc = self._skipIgnorables(instring, loc) 819 820 if self.skipWhitespace: 821 wt = self.whiteChars 822 instrlen = len(instring) 823 while loc < instrlen and instring[loc] in wt: 824 loc += 1 825 826 return loc 827 828 def parseImpl(self, instring, loc, doActions=True): 829 return loc, [] 830 831 def postParse(self, instring, loc, tokenlist): 832 return tokenlist 833 834 # ~ @profile 835 def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): 836 debugging = (self.debug) # and doActions ) 837 838 if debugging or self.failAction: 839 # ~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 840 if self.debugActions[0]: 841 self.debugActions[0](instring, loc, self) 842 if callPreParse and self.callPreparse: 843 preloc = self.preParse(instring, loc) 844 else: 845 preloc = loc 846 tokensStart = loc 847 try: 848 try: 849 loc, tokens = self.parseImpl(instring, preloc, doActions) 850 except IndexError: 851 raise ParseException(instring, len(instring), self.errmsg, self) 852 except ParseBaseException as err: 853 # ~ print ("Exception raised:", err) 854 if self.debugActions[2]: 855 self.debugActions[2](instring, tokensStart, self, err) 856 if self.failAction: 857 self.failAction(instring, tokensStart, self, err) 858 raise 859 else: 860 if callPreParse and self.callPreparse: 861 preloc = self.preParse(instring, loc) 862 else: 863 preloc = loc 864 tokensStart = loc 865 if self.mayIndexError or loc >= len(instring): 866 try: 867 loc, tokens = self.parseImpl(instring, preloc, doActions) 868 except IndexError: 869 raise ParseException(instring, len(instring), self.errmsg, self) 870 else: 871 loc, tokens = self.parseImpl(instring, preloc, doActions) 872 873 tokens = self.postParse(instring, loc, tokens) 874 875 retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults) 876 if self.parseAction and (doActions or self.callDuringTry): 877 if debugging: 878 try: 879 for fn in self.parseAction: 880 tokens = fn(instring, tokensStart, retTokens) 881 if tokens is not None: 882 retTokens = ParseResults( 883 tokens, 884 self.resultsName, 885 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 886 modal=self.modalResults) 887 except ParseBaseException as err: 888 # ~ print "Exception raised in user parse action:", err 889 if self.debugActions[2]: 890 self.debugActions[2](instring, tokensStart, self, err) 891 raise 892 else: 893 for fn in self.parseAction: 894 tokens = fn(instring, tokensStart, retTokens) 895 if tokens is not None: 896 retTokens = ParseResults( 897 tokens, 898 self.resultsName, 899 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 900 modal=self.modalResults) 901 902 if debugging: 903 # ~ print ("Matched",self,"->",retTokens.asList()) 904 if self.debugActions[1]: 905 self.debugActions[1](instring, tokensStart, loc, self, retTokens) 906 907 return loc, retTokens 908 909 def tryParse(self, instring, loc): 910 try: 911 return self._parse(instring, loc, doActions=False)[0] 912 except ParseFatalException: 913 raise ParseException(instring, loc, self.errmsg, self) 914 915 # this method gets repeatedly called during backtracking with the same arguments - 916 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 917 def _parseCache(self, instring, loc, doActions=True, callPreParse=True): 918 lookup = (self, instring, loc, callPreParse, doActions) 919 if lookup in ParserElement._exprArgCache: 920 value = ParserElement._exprArgCache[lookup] 921 if isinstance(value, Exception): 922 raise value 923 return value 924 else: 925 try: 926 value = self._parseNoCache(instring, loc, doActions, callPreParse) 927 ParserElement._exprArgCache[lookup] = (value[0], value[1].copy()) 928 return value 929 except ParseBaseException as pe: 930 ParserElement._exprArgCache[lookup] = pe 931 raise 932 933 _parse = _parseNoCache 934 935 # argument cache for optimizing repeated calls when backtracking through recursive expressions 936 _exprArgCache = {} 937 938 def resetCache(): 939 ParserElement._exprArgCache.clear() 940 resetCache = staticmethod(resetCache) 941 942 _packratEnabled = False 943 944 def enablePackrat(): 945 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 946 Repeated parse attempts at the same string location (which happens 947 often in many complex grammars) can immediately return a cached value, 948 instead of re-executing parsing/validating code. Memoizing is done of 949 both valid results and parsing exceptions. 950 951 This speedup may break existing programs that use parse actions that 952 have side-effects. For this reason, packrat parsing is disabled when 953 you first import pyparsing. To activate the packrat feature, your 954 program must call the class method ParserElement.enablePackrat(). If 955 your program uses psyco to "compile as you go", you must call 956 enablePackrat before calling psyco.full(). If you do not do this, 957 Python will crash. For best results, call enablePackrat() immediately 958 after importing pyparsing. 959 """ 960 if not ParserElement._packratEnabled: 961 ParserElement._packratEnabled = True 962 ParserElement._parse = ParserElement._parseCache 963 enablePackrat = staticmethod(enablePackrat) 964 965 def parseString(self, instring, parseAll=False): 966 """Execute the parse expression with the given string. 967 This is the main interface to the client code, once the complete 968 expression has been built. 969 970 If you want the grammar to require that the entire input string be 971 successfully parsed, then set parseAll to True (equivalent to ending 972 the grammar with StringEnd()). 973 974 Note: parseString implicitly calls expandtabs() on the input string, 975 in order to report proper column numbers in parse actions. 976 If the input string contains tabs and 977 the grammar uses parse actions that use the loc argument to index into the 978 string being parsed, you can ensure you have a consistent view of the input 979 string by: 980 - calling parseWithTabs on your grammar before calling parseString 981 (see L{I{parseWithTabs}<parseWithTabs>}) 982 - define your parse action using the full (s,loc,toks) signature, and 983 reference the input string using the parse action's s argument 984 - explictly expand the tabs in your input string before calling 985 parseString 986 """ 987 ParserElement.resetCache() 988 if not self.streamlined: 989 self.streamline() 990 # ~ self.saveAsList = True 991 for e in self.ignoreExprs: 992 e.streamline() 993 if not self.keepTabs: 994 instring = instring.expandtabs() 995 loc, tokens = self._parse(instring, 0) 996 if parseAll: 997 StringEnd()._parse(instring, loc) 998 return tokens 999 1000 def scanString(self, instring, maxMatches=_MAX_INT): 1001 """Scan the input string for expression matches. Each match will return the 1002 matching tokens, start location, and end location. May be called with optional 1003 maxMatches argument, to clip scanning after 'n' matches are found. 1004 1005 Note that the start and end locations are reported relative to the string 1006 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1007 strings with embedded tabs.""" 1008 if not self.streamlined: 1009 self.streamline() 1010 for e in self.ignoreExprs: 1011 e.streamline() 1012 1013 if not self.keepTabs: 1014 instring = str(instring).expandtabs() 1015 instrlen = len(instring) 1016 loc = 0 1017 preparseFn = self.preParse 1018 parseFn = self._parse 1019 ParserElement.resetCache() 1020 matches = 0 1021 while loc <= instrlen and matches < maxMatches: 1022 try: 1023 preloc = preparseFn(instring, loc) 1024 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 1025 except ParseException: 1026 loc = preloc+1 1027 else: 1028 matches += 1 1029 yield tokens, preloc, nextLoc 1030 loc = nextLoc 1031 1032 def transformString(self, instring): 1033 """Extension to scanString, to modify matching text with modified tokens that may 1034 be returned from a parse action. To use transformString, define a grammar and 1035 attach a parse action to it that modifies the returned token list. 1036 Invoking transformString() on a target string will then scan for matches, 1037 and replace the matched text patterns according to the logic in the parse 1038 action. transformString() returns the resulting transformed string.""" 1039 out = [] 1040 lastE = 0 1041 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1042 # keep string locs straight between transformString and scanString 1043 self.keepTabs = True 1044 for t, s, e in self.scanString(instring): 1045 out.append(instring[lastE:s]) 1046 if t: 1047 if isinstance(t, ParseResults): 1048 out += t.asList() 1049 elif isinstance(t, list): 1050 out += t 1051 else: 1052 out.append(t) 1053 lastE = e 1054 out.append(instring[lastE:]) 1055 return "".join(map(str, out)) 1056 1057 def searchString(self, instring, maxMatches=_MAX_INT): 1058 """Another extension to scanString, simplifying the access to the tokens found 1059 to match the given parse expression. May be called with optional 1060 maxMatches argument, to clip searching after 'n' matches are found. 1061 """ 1062 return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)]) 1063 1064 def __add__(self, other): 1065 """Implementation of + operator - returns And""" 1066 if isinstance(other, str): 1067 other = Literal(other) 1068 if not isinstance(other, ParserElement): 1069 warnings.warn( 1070 "Cannot combine element of type %s with ParserElement" % type(other), 1071 SyntaxWarning, 1072 stacklevel=2) 1073 return None 1074 return And([self, other]) 1075 1076 def __radd__(self, other): 1077 """Implementation of + operator when left operand is not a ParserElement""" 1078 if isinstance(other, str): 1079 other = Literal(other) 1080 if not isinstance(other, ParserElement): 1081 warnings.warn( 1082 "Cannot combine element of type %s with ParserElement" % type(other), 1083 SyntaxWarning, 1084 stacklevel=2) 1085 return None 1086 return other + self 1087 1088 def __sub__(self, other): 1089 """Implementation of - operator, returns And with error stop""" 1090 if isinstance(other, str): 1091 other = Literal(other) 1092 if not isinstance(other, ParserElement): 1093 warnings.warn( 1094 "Cannot combine element of type %s with ParserElement" % type(other), 1095 SyntaxWarning, 1096 stacklevel=2) 1097 return None 1098 return And([self, And._ErrorStop(), other]) 1099 1100 def __rsub__(self, other): 1101 """Implementation of - operator when left operand is not a ParserElement""" 1102 if isinstance(other, str): 1103 other = Literal(other) 1104 if not isinstance(other, ParserElement): 1105 warnings.warn( 1106 "Cannot combine element of type %s with ParserElement" % type(other), 1107 SyntaxWarning, 1108 stacklevel=2) 1109 return None 1110 return other - self 1111 1112 def __mul__(self, other): 1113 if isinstance(other, int): 1114 minElements, optElements = other, 0 1115 elif isinstance(other, tuple): 1116 if len(other) == 0: 1117 other = (None, None) 1118 elif len(other) == 1: 1119 other = (other[0], None) 1120 if len(other) == 2: 1121 if other[0] is None: 1122 other = (0, other[1]) 1123 if isinstance(other[0], int) and other[1] is None: 1124 if other[0] == 0: 1125 return ZeroOrMore(self) 1126 if other[0] == 1: 1127 return OneOrMore(self) 1128 else: 1129 return self*other[0] + ZeroOrMore(self) 1130 elif isinstance(other[0], int) and isinstance(other[1], int): 1131 minElements, optElements = other 1132 optElements -= minElements 1133 else: 1134 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]), type(other[1])) 1135 else: 1136 raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects") 1137 else: 1138 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1139 1140 if minElements < 0: 1141 raise ValueError("cannot multiply ParserElement by negative value") 1142 if optElements < 0: 1143 raise ValueError("second tuple value must be greater or equal to first tuple value") 1144 if minElements == optElements == 0: 1145 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1146 1147 if optElements: 1148 def makeOptionalList(n): 1149 if n > 1: 1150 return Optional(self + makeOptionalList(n-1)) 1151 else: 1152 return Optional(self) 1153 if minElements: 1154 if minElements == 1: 1155 ret = self + makeOptionalList(optElements) 1156 else: 1157 ret = And([self]*minElements) + makeOptionalList(optElements) 1158 else: 1159 ret = makeOptionalList(optElements) 1160 else: 1161 if minElements == 1: 1162 ret = self 1163 else: 1164 ret = And([self]*minElements) 1165 return ret 1166 1167 def __rmul__(self, other): 1168 return self.__mul__(other) 1169 1170 def __or__(self, other): 1171 """Implementation of | operator - returns MatchFirst""" 1172 if isinstance(other, str): 1173 other = Literal(other) 1174 if not isinstance(other, ParserElement): 1175 warnings.warn( 1176 "Cannot combine element of type %s with ParserElement" % type(other), 1177 SyntaxWarning, 1178 stacklevel=2) 1179 return None 1180 return MatchFirst([self, other]) 1181 1182 def __ror__(self, other): 1183 """Implementation of | operator when left operand is not a ParserElement""" 1184 if isinstance(other, str): 1185 other = Literal(other) 1186 if not isinstance(other, ParserElement): 1187 warnings.warn( 1188 "Cannot combine element of type %s with ParserElement" % type(other), 1189 SyntaxWarning, 1190 stacklevel=2) 1191 return None 1192 return other | self 1193 1194 def __xor__(self, other): 1195 """Implementation of ^ operator - returns Or""" 1196 if isinstance(other, str): 1197 other = Literal(other) 1198 if not isinstance(other, ParserElement): 1199 warnings.warn( 1200 "Cannot combine element of type %s with ParserElement" % type(other), 1201 SyntaxWarning, 1202 stacklevel=2) 1203 return None 1204 return Or([self, other]) 1205 1206 def __rxor__(self, other): 1207 """Implementation of ^ operator when left operand is not a ParserElement""" 1208 if isinstance(other, str): 1209 other = Literal(other) 1210 if not isinstance(other, ParserElement): 1211 warnings.warn( 1212 "Cannot combine element of type %s with ParserElement" % type(other), 1213 SyntaxWarning, 1214 stacklevel=2) 1215 return None 1216 return other ^ self 1217 1218 def __and__(self, other): 1219 """Implementation of & operator - returns Each""" 1220 if isinstance(other, str): 1221 other = Literal(other) 1222 if not isinstance(other, ParserElement): 1223 warnings.warn( 1224 "Cannot combine element of type %s with ParserElement" % type(other), 1225 SyntaxWarning, 1226 stacklevel=2) 1227 return None 1228 return Each([self, other]) 1229 1230 def __rand__(self, other): 1231 """Implementation of & operator when left operand is not a ParserElement""" 1232 if isinstance(other, str): 1233 other = Literal(other) 1234 if not isinstance(other, ParserElement): 1235 warnings.warn( 1236 "Cannot combine element of type %s with ParserElement" % type(other), 1237 SyntaxWarning, 1238 stacklevel=2) 1239 return None 1240 return other & self 1241 1242 def __invert__(self): 1243 """Implementation of ~ operator - returns NotAny""" 1244 return NotAny(self) 1245 1246 def __call__(self, name): 1247 """Shortcut for setResultsName, with listAllMatches=default:: 1248 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1249 could be written as:: 1250 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1251 """ 1252 return self.setResultsName(name) 1253 1254 def suppress(self): 1255 """Suppresses the output of this ParserElement; useful to keep punctuation from 1256 cluttering up returned output. 1257 """ 1258 return Suppress(self) 1259 1260 def leaveWhitespace(self): 1261 """Disables the skipping of whitespace before matching the characters in the 1262 ParserElement's defined pattern. This is normally only used internally by 1263 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1264 """ 1265 self.skipWhitespace = False 1266 return self 1267 1268 def setWhitespaceChars(self, chars): 1269 """Overrides the default whitespace chars 1270 """ 1271 self.skipWhitespace = True 1272 self.whiteChars = chars 1273 self.copyDefaultWhiteChars = False 1274 return self 1275 1276 def parseWithTabs(self): 1277 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 1278 Must be called before parseString when the input grammar contains elements that 1279 match <TAB> characters.""" 1280 self.keepTabs = True 1281 return self 1282 1283 def ignore(self, other): 1284 """Define expression to be ignored (e.g., comments) while doing pattern 1285 matching; may be called repeatedly, to define multiple comment or other 1286 ignorable patterns. 1287 """ 1288 if isinstance(other, Suppress): 1289 if other not in self.ignoreExprs: 1290 self.ignoreExprs.append(other) 1291 else: 1292 self.ignoreExprs.append(Suppress(other)) 1293 return self 1294 1295 def setDebugActions(self, startAction, successAction, exceptionAction): 1296 """Enable display of debugging messages while doing pattern matching.""" 1297 self.debugActions = (startAction or _defaultStartDebugAction, 1298 successAction or _defaultSuccessDebugAction, 1299 exceptionAction or _defaultExceptionDebugAction) 1300 self.debug = True 1301 return self 1302 1303 def setDebug(self, flag=True): 1304 """Enable display of debugging messages while doing pattern matching. 1305 Set flag to True to enable, False to disable.""" 1306 if flag: 1307 self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction) 1308 else: 1309 self.debug = False 1310 return self 1311 1312 def __str__(self): 1313 return self.name 1314 1315 def __repr__(self): 1316 return str(self) 1317 1318 def streamline(self): 1319 self.streamlined = True 1320 self.strRepr = None 1321 return self 1322 1323 def checkRecursion(self, parseElementList): 1324 pass 1325 1326 def validate(self, validateTrace=None): 1327 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1328 self.checkRecursion([]) 1329 1330 def parseFile(self, file_or_filename): 1331 """Execute the parse expression on the given file or filename. 1332 If a filename is specified (instead of a file object), 1333 the entire file is opened, read, and closed before parsing. 1334 """ 1335 try: 1336 file_contents = file_or_filename.read() 1337 except AttributeError: 1338 f = open(file_or_filename, "rb") 1339 file_contents = f.read() 1340 f.close() 1341 return self.parseString(file_contents) 1342 1343 def getException(self): 1344 return ParseException("", 0, self.errmsg, self) 1345 1346 def __getattr__(self, aname): 1347 if aname == "myException": 1348 self.myException = ret = self.getException() 1349 return ret 1350 else: 1351 raise AttributeError("no such attribute " + aname) 1352 1353 def __eq__(self, other): 1354 if isinstance(other, str): 1355 try: 1356 (self + StringEnd()).parseString(str(other)) 1357 return True 1358 except ParseBaseException: 1359 return False 1360 else: 1361 return super() == other 1362 1363 def __hash__(self): 1364 return hash(id(self)) 1365 1366 def __req__(self, other): 1367 return self == other 1368 1369 1370class Token(ParserElement): 1371 """Abstract ParserElement subclass, for defining atomic matching patterns.""" 1372 1373 def __init__(self): 1374 super().__init__(savelist=False) 1375 1376 def setName(self, name): 1377 s = super().setName(name) 1378 self.errmsg = "Expected " + self.name 1379 return s 1380 1381 1382class Empty(Token): 1383 """An empty token, will always match.""" 1384 1385 def __init__(self): 1386 super().__init__() 1387 self.name = "Empty" 1388 self.mayReturnEmpty = True 1389 self.mayIndexError = False 1390 1391 1392class NoMatch(Token): 1393 """A token that will never match.""" 1394 1395 def __init__(self): 1396 super().__init__() 1397 self.name = "NoMatch" 1398 self.mayReturnEmpty = True 1399 self.mayIndexError = False 1400 self.errmsg = "Unmatchable token" 1401 1402 def parseImpl(self, instring, loc, doActions=True): 1403 exc = self.myException 1404 exc.loc = loc 1405 exc.pstr = instring 1406 raise exc 1407 1408 1409class Literal(Token): 1410 """Token to exactly match a specified string.""" 1411 1412 def __init__(self, matchString): 1413 super().__init__() 1414 self.match = matchString 1415 self.matchLen = len(matchString) 1416 try: 1417 self.firstMatchChar = matchString[0] 1418 except IndexError: 1419 warnings.warn( 1420 "null string passed to Literal; use Empty() instead", 1421 SyntaxWarning, 1422 stacklevel=2) 1423 self.__class__ = Empty 1424 self.name = '"%s"' % str(self.match) 1425 self.errmsg = "Expected " + self.name 1426 self.mayReturnEmpty = False 1427 self.mayIndexError = False 1428 1429 # Performance tuning: this routine gets called a *lot* 1430 # if this is a single character match string and the first character matches, 1431 # short-circuit as quickly as possible, and avoid calling startswith 1432 # ~ @profile 1433 def parseImpl(self, instring, loc, doActions=True): 1434 if (instring[loc] == self.firstMatchChar 1435 and (self.matchLen == 1 or instring.startswith(self.match, loc))): 1436 return loc+self.matchLen, self.match 1437 # ~ raise ParseException( instring, loc, self.errmsg ) 1438 exc = self.myException 1439 exc.loc = loc 1440 exc.pstr = instring 1441 raise exc 1442 1443 1444_L = Literal 1445 1446 1447class Keyword(Token): 1448 """Token to exactly match a specified string as a keyword, that is, it must be 1449 immediately followed by a non-keyword character. Compare with Literal:: 1450 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 1451 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 1452 Accepts two optional constructor arguments in addition to the keyword string: 1453 identChars is a string of characters that would be valid identifier characters, 1454 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 1455 matching, default is False. 1456 """ 1457 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1458 1459 def __init__(self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False): 1460 super().__init__() 1461 self.match = matchString 1462 self.matchLen = len(matchString) 1463 try: 1464 self.firstMatchChar = matchString[0] 1465 except IndexError: 1466 warnings.warn( 1467 "null string passed to Keyword; use Empty() instead", 1468 SyntaxWarning, 1469 stacklevel=2) 1470 self.name = '"%s"' % self.match 1471 self.errmsg = "Expected " + self.name 1472 self.mayReturnEmpty = False 1473 self.mayIndexError = False 1474 self.caseless = caseless 1475 if caseless: 1476 self.caselessmatch = matchString.upper() 1477 identChars = identChars.upper() 1478 self.identChars = _str2dict(identChars) 1479 1480 def parseImpl(self, instring, loc, doActions=True): 1481 if self.caseless: 1482 if ((instring[loc:loc+self.matchLen].upper() == self.caselessmatch) 1483 and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) 1484 and (loc == 0 or instring[loc-1].upper() not in self.identChars)): 1485 return loc+self.matchLen, self.match 1486 else: 1487 if (instring[loc] == self.firstMatchChar 1488 and (self.matchLen == 1 or instring.startswith(self.match, loc)) 1489 and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) 1490 and (loc == 0 or instring[loc-1] not in self.identChars)): 1491 return loc+self.matchLen, self.match 1492 # ~ raise ParseException( instring, loc, self.errmsg ) 1493 exc = self.myException 1494 exc.loc = loc 1495 exc.pstr = instring 1496 raise exc 1497 1498 def copy(self): 1499 c = super().copy() 1500 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1501 return c 1502 1503 def setDefaultKeywordChars(chars): 1504 """Overrides the default Keyword chars 1505 """ 1506 Keyword.DEFAULT_KEYWORD_CHARS = chars 1507 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars) 1508 1509 1510class CaselessLiteral(Literal): 1511 """Token to match a specified string, ignoring case of letters. 1512 Note: the matched results will always be in the case of the given 1513 match string, NOT the case of the input text. 1514 """ 1515 1516 def __init__(self, matchString): 1517 super().__init__(matchString.upper()) 1518 # Preserve the defining literal. 1519 self.returnString = matchString 1520 self.name = "'%s'" % self.returnString 1521 self.errmsg = "Expected " + self.name 1522 1523 def parseImpl(self, instring, loc, doActions=True): 1524 if instring[loc:loc+self.matchLen].upper() == self.match: 1525 return loc+self.matchLen, self.returnString 1526 # ~ raise ParseException( instring, loc, self.errmsg ) 1527 exc = self.myException 1528 exc.loc = loc 1529 exc.pstr = instring 1530 raise exc 1531 1532 1533class CaselessKeyword(Keyword): 1534 def __init__(self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS): 1535 super().__init__(matchString, identChars, caseless=True) 1536 1537 def parseImpl(self, instring, loc, doActions=True): 1538 if ((instring[loc:loc+self.matchLen].upper() == self.caselessmatch) 1539 and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars)): 1540 return loc+self.matchLen, self.match 1541 # ~ raise ParseException( instring, loc, self.errmsg ) 1542 exc = self.myException 1543 exc.loc = loc 1544 exc.pstr = instring 1545 raise exc 1546 1547 1548class Word(Token): 1549 """Token for matching words composed of allowed character sets. 1550 Defined with string containing all allowed initial characters, 1551 an optional string containing allowed body characters (if omitted, 1552 defaults to the initial character set), and an optional minimum, 1553 maximum, and/or exact length. The default value for min is 1 (a 1554 minimum value < 1 is not valid); the default values for max and exact 1555 are 0, meaning no maximum or exact length restriction. 1556 """ 1557 1558 def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False): 1559 super().__init__() 1560 self.initCharsOrig = initChars 1561 self.initChars = _str2dict(initChars) 1562 if bodyChars: 1563 self.bodyCharsOrig = bodyChars 1564 self.bodyChars = _str2dict(bodyChars) 1565 else: 1566 self.bodyCharsOrig = initChars 1567 self.bodyChars = _str2dict(initChars) 1568 1569 self.maxSpecified = max > 0 1570 1571 if min < 1: 1572 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1573 1574 self.minLen = min 1575 1576 if max > 0: 1577 self.maxLen = max 1578 else: 1579 self.maxLen = _MAX_INT 1580 1581 if exact > 0: 1582 self.maxLen = exact 1583 self.minLen = exact 1584 1585 self.name = str(self) 1586 self.errmsg = "Expected " + self.name 1587 self.mayIndexError = False 1588 self.asKeyword = asKeyword 1589 1590 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0): 1591 if self.bodyCharsOrig == self.initCharsOrig: 1592 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1593 elif len(self.bodyCharsOrig) == 1: 1594 self.reString = "{}[{}]*".format( 1595 re.escape(self.initCharsOrig), 1596 _escapeRegexRangeChars(self.bodyCharsOrig)) 1597 else: 1598 self.reString = "[{}][{}]*".format( 1599 _escapeRegexRangeChars(self.initCharsOrig), 1600 _escapeRegexRangeChars(self.bodyCharsOrig)) 1601 if self.asKeyword: 1602 self.reString = r"\b"+self.reString+r"\b" 1603 try: 1604 self.re = re.compile(self.reString) 1605 except Exception: 1606 self.re = None 1607 1608 def parseImpl(self, instring, loc, doActions=True): 1609 if self.re: 1610 result = self.re.match(instring, loc) 1611 if not result: 1612 exc = self.myException 1613 exc.loc = loc 1614 exc.pstr = instring 1615 raise exc 1616 1617 loc = result.end() 1618 return loc, result.group() 1619 1620 if not(instring[loc] in self.initChars): 1621 # ~ raise ParseException( instring, loc, self.errmsg ) 1622 exc = self.myException 1623 exc.loc = loc 1624 exc.pstr = instring 1625 raise exc 1626 start = loc 1627 loc += 1 1628 instrlen = len(instring) 1629 bodychars = self.bodyChars 1630 maxloc = start + self.maxLen 1631 maxloc = min(maxloc, instrlen) 1632 while loc < maxloc and instring[loc] in bodychars: 1633 loc += 1 1634 1635 throwException = False 1636 if loc - start < self.minLen: 1637 throwException = True 1638 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1639 throwException = True 1640 if self.asKeyword: 1641 if (start > 0 and instring[start-1] in bodychars) or (loc < instrlen and instring[loc] in bodychars): 1642 throwException = True 1643 1644 if throwException: 1645 # ~ raise ParseException( instring, loc, self.errmsg ) 1646 exc = self.myException 1647 exc.loc = loc 1648 exc.pstr = instring 1649 raise exc 1650 1651 return loc, instring[start:loc] 1652 1653 def __str__(self): 1654 try: 1655 return super().__str__() 1656 except Exception: 1657 pass 1658 1659 if self.strRepr is None: 1660 1661 def charsAsStr(s): 1662 if len(s) > 4: 1663 return s[:4]+"..." 1664 else: 1665 return s 1666 1667 if self.initCharsOrig != self.bodyCharsOrig: 1668 self.strRepr = f"W:({charsAsStr(self.initCharsOrig)},{charsAsStr(self.bodyCharsOrig)})" 1669 else: 1670 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1671 1672 return self.strRepr 1673 1674 1675class Regex(Token): 1676 """Token for matching strings that match a given regular expression. 1677 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1678 """ 1679 1680 def __init__(self, pattern, flags=0): 1681 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 1682 super().__init__() 1683 1684 if len(pattern) == 0: 1685 warnings.warn( 1686 "null string passed to Regex; use Empty() instead", 1687 SyntaxWarning, 1688 stacklevel=2) 1689 1690 self.pattern = pattern 1691 self.flags = flags 1692 1693 try: 1694 self.re = re.compile(self.pattern, self.flags) 1695 self.reString = self.pattern 1696 except sre_constants.error: 1697 warnings.warn( 1698 "invalid pattern (%s) passed to Regex" % pattern, 1699 SyntaxWarning, 1700 stacklevel=2) 1701 raise 1702 1703 self.name = str(self) 1704 self.errmsg = "Expected " + self.name 1705 self.mayIndexError = False 1706 self.mayReturnEmpty = True 1707 1708 def parseImpl(self, instring, loc, doActions=True): 1709 result = self.re.match(instring, loc) 1710 if not result: 1711 exc = self.myException 1712 exc.loc = loc 1713 exc.pstr = instring 1714 raise exc 1715 1716 loc = result.end() 1717 d = result.groupdict() 1718 ret = ParseResults(result.group()) 1719 if d: 1720 for k in d: 1721 ret[k] = d[k] 1722 return loc, ret 1723 1724 def __str__(self): 1725 try: 1726 return super().__str__() 1727 except Exception: 1728 pass 1729 1730 if self.strRepr is None: 1731 self.strRepr = "Re:(%s)" % repr(self.pattern) 1732 1733 return self.strRepr 1734 1735 1736class QuotedString(Token): 1737 """Token for matching strings that are delimited by quoting characters. 1738 """ 1739 1740 def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None): 1741 """ 1742 Defined with the following parameters: 1743 - quoteChar - string of one or more characters defining the quote delimiting string 1744 - escChar - character to escape quotes, typically backslash (default=None) 1745 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1746 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1747 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1748 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1749 """ 1750 super().__init__() 1751 1752 # remove white space from quote chars - wont work anyway 1753 quoteChar = quoteChar.strip() 1754 if len(quoteChar) == 0: 1755 warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 1756 raise SyntaxError() 1757 1758 if endQuoteChar is None: 1759 endQuoteChar = quoteChar 1760 else: 1761 endQuoteChar = endQuoteChar.strip() 1762 if len(endQuoteChar) == 0: 1763 warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 1764 raise SyntaxError() 1765 1766 self.quoteChar = quoteChar 1767 self.quoteCharLen = len(quoteChar) 1768 self.firstQuoteChar = quoteChar[0] 1769 self.endQuoteChar = endQuoteChar 1770 self.endQuoteCharLen = len(endQuoteChar) 1771 self.escChar = escChar 1772 self.escQuote = escQuote 1773 self.unquoteResults = unquoteResults 1774 1775 if multiline: 1776 self.flags = re.MULTILINE | re.DOTALL 1777 self.pattern = r'{}(?:[^{}{}]'.format( 1778 re.escape(self.quoteChar), 1779 _escapeRegexRangeChars(self.endQuoteChar[0]), 1780 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 1781 else: 1782 self.flags = 0 1783 self.pattern = r'{}(?:[^{}\n\r{}]'.format( 1784 re.escape(self.quoteChar), 1785 _escapeRegexRangeChars(self.endQuoteChar[0]), 1786 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 1787 if len(self.endQuoteChar) > 1: 1788 self.pattern += ( 1789 '|(?:' + ')|(?:'.join(["{}[^{}]".format( 1790 re.escape(self.endQuoteChar[:i]), 1791 _escapeRegexRangeChars(self.endQuoteChar[i]) 1792 ) for i in range(len(self.endQuoteChar)-1, 0, -1)]) + ')') 1793 if escQuote: 1794 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1795 if escChar: 1796 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1797 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1798 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1799 1800 try: 1801 self.re = re.compile(self.pattern, self.flags) 1802 self.reString = self.pattern 1803 except sre_constants.error: 1804 warnings.warn( 1805 "invalid pattern (%s) passed to Regex" % self.pattern, 1806 SyntaxWarning, 1807 stacklevel=2) 1808 raise 1809 1810 self.name = str(self) 1811 self.errmsg = "Expected " + self.name 1812 self.mayIndexError = False 1813 self.mayReturnEmpty = True 1814 1815 def parseImpl(self, instring, loc, doActions=True): 1816 result = instring[loc] == self.firstQuoteChar and self.re.match(instring, loc) or None 1817 if not result: 1818 exc = self.myException 1819 exc.loc = loc 1820 exc.pstr = instring 1821 raise exc 1822 1823 loc = result.end() 1824 ret = result.group() 1825 1826 if self.unquoteResults: 1827 1828 # strip off quotes 1829 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1830 1831 if isinstance(ret, str): 1832 # replace escaped characters 1833 if self.escChar: 1834 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 1835 1836 # replace escaped quotes 1837 if self.escQuote: 1838 ret = ret.replace(self.escQuote, self.endQuoteChar) 1839 1840 return loc, ret 1841 1842 def __str__(self): 1843 try: 1844 return super().__str__() 1845 except Exception: 1846 pass 1847 1848 if self.strRepr is None: 1849 self.strRepr = f"quoted string, starting with {self.quoteChar} ending with {self.endQuoteChar}" 1850 1851 return self.strRepr 1852 1853 1854class CharsNotIn(Token): 1855 """Token for matching words composed of characters *not* in a given set. 1856 Defined with string containing all disallowed characters, and an optional 1857 minimum, maximum, and/or exact length. The default value for min is 1 (a 1858 minimum value < 1 is not valid); the default values for max and exact 1859 are 0, meaning no maximum or exact length restriction. 1860 """ 1861 1862 def __init__(self, notChars, min=1, max=0, exact=0): 1863 super().__init__() 1864 self.skipWhitespace = False 1865 self.notChars = notChars 1866 1867 if min < 1: 1868 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1869 1870 self.minLen = min 1871 1872 if max > 0: 1873 self.maxLen = max 1874 else: 1875 self.maxLen = _MAX_INT 1876 1877 if exact > 0: 1878 self.maxLen = exact 1879 self.minLen = exact 1880 1881 self.name = str(self) 1882 self.errmsg = "Expected " + self.name 1883 self.mayReturnEmpty = (self.minLen == 0) 1884 self.mayIndexError = False 1885 1886 def parseImpl(self, instring, loc, doActions=True): 1887 if instring[loc] in self.notChars: 1888 # ~ raise ParseException( instring, loc, self.errmsg ) 1889 exc = self.myException 1890 exc.loc = loc 1891 exc.pstr = instring 1892 raise exc 1893 1894 start = loc 1895 loc += 1 1896 notchars = self.notChars 1897 maxlen = min(start+self.maxLen, len(instring)) 1898 while loc < maxlen and (instring[loc] not in notchars): 1899 loc += 1 1900 1901 if loc - start < self.minLen: 1902 # ~ raise ParseException( instring, loc, self.errmsg ) 1903 exc = self.myException 1904 exc.loc = loc 1905 exc.pstr = instring 1906 raise exc 1907 1908 return loc, instring[start:loc] 1909 1910 def __str__(self): 1911 try: 1912 return super().__str__() 1913 except Exception: 1914 pass 1915 1916 if self.strRepr is None: 1917 if len(self.notChars) > 4: 1918 self.strRepr = "!W:(%s...)" % self.notChars[:4] 1919 else: 1920 self.strRepr = "!W:(%s)" % self.notChars 1921 1922 return self.strRepr 1923 1924 1925class White(Token): 1926 """Special matching class for matching whitespace. Normally, whitespace is ignored 1927 by pyparsing grammars. This class is included when some whitespace structures 1928 are significant. Define with a string containing the whitespace characters to be 1929 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, 1930 as defined for the Word class.""" 1931 whiteStrs = { 1932 " ": "<SPC>", 1933 "\t": "<TAB>", 1934 "\n": "<LF>", 1935 "\r": "<CR>", 1936 "\f": "<FF>", 1937 } 1938 1939 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 1940 super().__init__() 1941 self.matchWhite = ws 1942 self.setWhitespaceChars("".join([c for c in self.whiteChars if c not in self.matchWhite])) 1943 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 1944 self.mayReturnEmpty = True 1945 self.errmsg = "Expected " + self.name 1946 1947 self.minLen = min 1948 1949 if max > 0: 1950 self.maxLen = max 1951 else: 1952 self.maxLen = _MAX_INT 1953 1954 if exact > 0: 1955 self.maxLen = exact 1956 self.minLen = exact 1957 1958 def parseImpl(self, instring, loc, doActions=True): 1959 if not(instring[loc] in self.matchWhite): 1960 exc = self.myException 1961 exc.loc = loc 1962 exc.pstr = instring 1963 raise exc 1964 start = loc 1965 loc += 1 1966 maxloc = start + self.maxLen 1967 maxloc = min(maxloc, len(instring)) 1968 while loc < maxloc and instring[loc] in self.matchWhite: 1969 loc += 1 1970 1971 if loc - start < self.minLen: 1972 # ~ raise ParseException( instring, loc, self.errmsg ) 1973 exc = self.myException 1974 exc.loc = loc 1975 exc.pstr = instring 1976 raise exc 1977 1978 return loc, instring[start:loc] 1979 1980 1981class _PositionToken(Token): 1982 def __init__(self): 1983 super().__init__() 1984 self.name = self.__class__.__name__ 1985 self.mayReturnEmpty = True 1986 self.mayIndexError = False 1987 1988 1989class GoToColumn(_PositionToken): 1990 """Token to advance to a specific column of input text; useful for tabular report scraping.""" 1991 1992 def __init__(self, colno): 1993 super().__init__() 1994 self.col = colno 1995 1996 def preParse(self, instring, loc): 1997 if col(loc, instring) != self.col: 1998 instrlen = len(instring) 1999 if self.ignoreExprs: 2000 loc = self._skipIgnorables(instring, loc) 2001 while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col: 2002 loc += 1 2003 return loc 2004 2005 def parseImpl(self, instring, loc, doActions=True): 2006 thiscol = col(loc, instring) 2007 if thiscol > self.col: 2008 raise ParseException(instring, loc, "Text not in expected column", self) 2009 newloc = loc + self.col - thiscol 2010 ret = instring[loc: newloc] 2011 return newloc, ret 2012 2013 2014class LineStart(_PositionToken): 2015 """Matches if current position is at the beginning of a line within the parse string""" 2016 2017 def __init__(self): 2018 super().__init__() 2019 self.setWhitespaceChars(" \t") 2020 self.errmsg = "Expected start of line" 2021 2022 def preParse(self, instring, loc): 2023 preloc = super().preParse(instring, loc) 2024 if instring[preloc] == "\n": 2025 loc += 1 2026 return loc 2027 2028 def parseImpl(self, instring, loc, doActions=True): 2029 if not(loc == 0 2030 or (loc == self.preParse(instring, 0)) 2031 or (instring[loc-1] == "\n")): # col(loc, instring) != 1: 2032 exc = self.myException 2033 exc.loc = loc 2034 exc.pstr = instring 2035 raise exc 2036 return loc, [] 2037 2038 2039class LineEnd(_PositionToken): 2040 """Matches if current position is at the end of a line within the parse string""" 2041 2042 def __init__(self): 2043 super().__init__() 2044 self.setWhitespaceChars(" \t") 2045 self.errmsg = "Expected end of line" 2046 2047 def parseImpl(self, instring, loc, doActions=True): 2048 if loc < len(instring): 2049 if instring[loc] == "\n": 2050 return loc+1, "\n" 2051 else: 2052 exc = self.myException 2053 exc.loc = loc 2054 exc.pstr = instring 2055 raise exc 2056 elif loc == len(instring): 2057 return loc+1, [] 2058 else: 2059 exc = self.myException 2060 exc.loc = loc 2061 exc.pstr = instring 2062 raise exc 2063 2064 2065class StringStart(_PositionToken): 2066 """Matches if current position is at the beginning of the parse string""" 2067 2068 def __init__(self): 2069 super().__init__() 2070 self.errmsg = "Expected start of text" 2071 2072 def parseImpl(self, instring, loc, doActions=True): 2073 if loc != 0: 2074 # see if entire string up to here is just whitespace and ignoreables 2075 if loc != self.preParse(instring, 0): 2076 exc = self.myException 2077 exc.loc = loc 2078 exc.pstr = instring 2079 raise exc 2080 return loc, [] 2081 2082 2083class StringEnd(_PositionToken): 2084 """Matches if current position is at the end of the parse string""" 2085 2086 def __init__(self): 2087 super().__init__() 2088 self.errmsg = "Expected end of text" 2089 2090 def parseImpl(self, instring, loc, doActions=True): 2091 if loc < len(instring): 2092 exc = self.myException 2093 exc.loc = loc 2094 exc.pstr = instring 2095 raise exc 2096 elif loc == len(instring): 2097 return loc+1, [] 2098 elif loc > len(instring): 2099 return loc, [] 2100 else: 2101 exc = self.myException 2102 exc.loc = loc 2103 exc.pstr = instring 2104 raise exc 2105 2106 2107class WordStart(_PositionToken): 2108 """Matches if the current position is at the beginning of a Word, and 2109 is not preceded by any character in a given set of wordChars 2110 (default=printables). To emulate the \b behavior of regular expressions, 2111 use WordStart(alphanums). WordStart will also match at the beginning of 2112 the string being parsed, or at the beginning of a line. 2113 """ 2114 2115 def __init__(self, wordChars=printables): 2116 super().__init__() 2117 self.wordChars = _str2dict(wordChars) 2118 self.errmsg = "Not at the start of a word" 2119 2120 def parseImpl(self, instring, loc, doActions=True): 2121 if loc != 0: 2122 if (instring[loc-1] in self.wordChars 2123 or instring[loc] not in self.wordChars): 2124 exc = self.myException 2125 exc.loc = loc 2126 exc.pstr = instring 2127 raise exc 2128 return loc, [] 2129 2130 2131class WordEnd(_PositionToken): 2132 """Matches if the current position is at the end of a Word, and 2133 is not followed by any character in a given set of wordChars 2134 (default=printables). To emulate the \b behavior of regular expressions, 2135 use WordEnd(alphanums). WordEnd will also match at the end of 2136 the string being parsed, or at the end of a line. 2137 """ 2138 2139 def __init__(self, wordChars=printables): 2140 super().__init__() 2141 self.wordChars = _str2dict(wordChars) 2142 self.skipWhitespace = False 2143 self.errmsg = "Not at the end of a word" 2144 2145 def parseImpl(self, instring, loc, doActions=True): 2146 instrlen = len(instring) 2147 if instrlen > 0 and loc < instrlen: 2148 if (instring[loc] in self.wordChars 2149 or instring[loc-1] not in self.wordChars): 2150 # ~ raise ParseException( instring, loc, "Expected end of word" ) 2151 exc = self.myException 2152 exc.loc = loc 2153 exc.pstr = instring 2154 raise exc 2155 return loc, [] 2156 2157 2158class ParseExpression(ParserElement): 2159 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 2160 2161 def __init__(self, exprs, savelist=False): 2162 super().__init__(savelist) 2163 if isinstance(exprs, list): 2164 self.exprs = exprs 2165 elif isinstance(exprs, str): 2166 self.exprs = [Literal(exprs)] 2167 else: 2168 self.exprs = [exprs] 2169 self.callPreparse = False 2170 2171 def __getitem__(self, i): 2172 return self.exprs[i] 2173 2174 def append(self, other): 2175 self.exprs.append(other) 2176 self.strRepr = None 2177 return self 2178 2179 def leaveWhitespace(self): 2180 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 2181 all contained expressions.""" 2182 self.skipWhitespace = False 2183 self.exprs = [e.copy() for e in self.exprs] 2184 for e in self.exprs: 2185 e.leaveWhitespace() 2186 return self 2187 2188 def ignore(self, other): 2189 if isinstance(other, Suppress): 2190 if other not in self.ignoreExprs: 2191 super().ignore(other) 2192 for e in self.exprs: 2193 e.ignore(self.ignoreExprs[-1]) 2194 else: 2195 super().ignore(other) 2196 for e in self.exprs: 2197 e.ignore(self.ignoreExprs[-1]) 2198 return self 2199 2200 def __str__(self): 2201 try: 2202 return super().__str__() 2203 except Exception: 2204 pass 2205 2206 if self.strRepr is None: 2207 self.strRepr = f"{self.__class__.__name__}:({str(self.exprs)})" 2208 return self.strRepr 2209 2210 def streamline(self): 2211 super().streamline() 2212 2213 for e in self.exprs: 2214 e.streamline() 2215 2216 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2217 # but only if there are no parse actions or resultsNames on the nested And's 2218 # (likewise for Or's and MatchFirst's) 2219 if len(self.exprs) == 2: 2220 other = self.exprs[0] 2221 if (isinstance(other, self.__class__) 2222 and not(other.parseAction) 2223 and other.resultsName is None 2224 and not other.debug): 2225 self.exprs = other.exprs[:] + [self.exprs[1]] 2226 self.strRepr = None 2227 self.mayReturnEmpty |= other.mayReturnEmpty 2228 self.mayIndexError |= other.mayIndexError 2229 2230 other = self.exprs[-1] 2231 if (isinstance(other, self.__class__) 2232 and not(other.parseAction) 2233 and other.resultsName is None 2234 and not other.debug): 2235 self.exprs = self.exprs[:-1] + other.exprs[:] 2236 self.strRepr = None 2237 self.mayReturnEmpty |= other.mayReturnEmpty 2238 self.mayIndexError |= other.mayIndexError 2239 2240 return self 2241 2242 def setResultsName(self, name, listAllMatches=False): 2243 ret = super().setResultsName(name, listAllMatches) 2244 return ret 2245 2246 def validate(self, validateTrace=None): 2247 if validateTrace is None: 2248 validateTrace = [] 2249 tmp = validateTrace[:]+[self] 2250 for e in self.exprs: 2251 e.validate(tmp) 2252 self.checkRecursion([]) 2253 2254 2255class And(ParseExpression): 2256 """Requires all given ParseExpressions to be found in the given order. 2257 Expressions may be separated by whitespace. 2258 May be constructed using the '+' operator. 2259 """ 2260 2261 class _ErrorStop(Empty): 2262 def __new__(cls, *args, **kwargs): 2263 return And._ErrorStop.instance 2264 _ErrorStop.instance = Empty() 2265 _ErrorStop.instance.leaveWhitespace() 2266 2267 def __init__(self, exprs, savelist=True): 2268 super().__init__(exprs, savelist) 2269 self.mayReturnEmpty = True 2270 for e in self.exprs: 2271 if not e.mayReturnEmpty: 2272 self.mayReturnEmpty = False 2273 break 2274 self.setWhitespaceChars(exprs[0].whiteChars) 2275 self.skipWhitespace = exprs[0].skipWhitespace 2276 self.callPreparse = True 2277 2278 def parseImpl(self, instring, loc, doActions=True): 2279 # pass False as last arg to _parse for first element, since we already 2280 # pre-parsed the string as part of our And pre-parsing 2281 loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False) 2282 errorStop = False 2283 for e in self.exprs[1:]: 2284 if e is And._ErrorStop.instance: 2285 errorStop = True 2286 continue 2287 if errorStop: 2288 try: 2289 loc, exprtokens = e._parse(instring, loc, doActions) 2290 except ParseBaseException as pe: 2291 raise ParseSyntaxException(pe) 2292 except IndexError: 2293 raise ParseSyntaxException(ParseException(instring, len(instring), self.errmsg, self)) 2294 else: 2295 loc, exprtokens = e._parse(instring, loc, doActions) 2296 if exprtokens or exprtokens.keys(): 2297 resultlist += exprtokens 2298 return loc, resultlist 2299 2300 def __iadd__(self, other): 2301 if isinstance(other, str): 2302 other = Literal(other) 2303 return self.append(other) # And( [ self, other ] ) 2304 2305 def checkRecursion(self, parseElementList): 2306 subRecCheckList = parseElementList[:] + [self] 2307 for e in self.exprs: 2308 e.checkRecursion(subRecCheckList) 2309 if not e.mayReturnEmpty: 2310 break 2311 2312 def __str__(self): 2313 if hasattr(self, "name"): 2314 return self.name 2315 2316 if self.strRepr is None: 2317 self.strRepr = "{" + " ".join([str(e) for e in self.exprs]) + "}" 2318 2319 return self.strRepr 2320 2321 2322class Or(ParseExpression): 2323 """Requires that at least one ParseExpression is found. 2324 If two expressions match, the expression that matches the longest string will be used. 2325 May be constructed using the '^' operator. 2326 """ 2327 2328 def __init__(self, exprs, savelist=False): 2329 super().__init__(exprs, savelist) 2330 self.mayReturnEmpty = False 2331 for e in self.exprs: 2332 if e.mayReturnEmpty: 2333 self.mayReturnEmpty = True 2334 break 2335 2336 def parseImpl(self, instring, loc, doActions=True): 2337 maxExcLoc = -1 2338 maxMatchLoc = -1 2339 maxException = None 2340 for e in self.exprs: 2341 try: 2342 loc2 = e.tryParse(instring, loc) 2343 except ParseException as err: 2344 if err.loc > maxExcLoc: 2345 maxException = err 2346 maxExcLoc = err.loc 2347 except IndexError: 2348 if len(instring) > maxExcLoc: 2349 maxException = ParseException(instring, len(instring), e.errmsg, self) 2350 maxExcLoc = len(instring) 2351 else: 2352 if loc2 > maxMatchLoc: 2353 maxMatchLoc = loc2 2354 maxMatchExp = e 2355 2356 if maxMatchLoc < 0: 2357 if maxException is not None: 2358 raise maxException 2359 else: 2360 raise ParseException(instring, loc, "no defined alternatives to match", self) 2361 2362 return maxMatchExp._parse(instring, loc, doActions) 2363 2364 def __ixor__(self, other): 2365 if isinstance(other, str): 2366 other = Literal(other) 2367 return self.append(other) # Or( [ self, other ] ) 2368 2369 def __str__(self): 2370 if hasattr(self, "name"): 2371 return self.name 2372 2373 if self.strRepr is None: 2374 self.strRepr = "{" + " ^ ".join([str(e) for e in self.exprs]) + "}" 2375 2376 return self.strRepr 2377 2378 def checkRecursion(self, parseElementList): 2379 subRecCheckList = parseElementList[:] + [self] 2380 for e in self.exprs: 2381 e.checkRecursion(subRecCheckList) 2382 2383 2384class MatchFirst(ParseExpression): 2385 """Requires that at least one ParseExpression is found. 2386 If two expressions match, the first one listed is the one that will match. 2387 May be constructed using the '|' operator. 2388 """ 2389 2390 def __init__(self, exprs, savelist=False): 2391 super().__init__(exprs, savelist) 2392 if exprs: 2393 self.mayReturnEmpty = False 2394 for e in self.exprs: 2395 if e.mayReturnEmpty: 2396 self.mayReturnEmpty = True 2397 break 2398 else: 2399 self.mayReturnEmpty = True 2400 2401 def parseImpl(self, instring, loc, doActions=True): 2402 maxExcLoc = -1 2403 maxException = None 2404 for e in self.exprs: 2405 try: 2406 ret = e._parse(instring, loc, doActions) 2407 return ret 2408 except ParseException as err: 2409 if err.loc > maxExcLoc: 2410 maxException = err 2411 maxExcLoc = err.loc 2412 except IndexError: 2413 if len(instring) > maxExcLoc: 2414 maxException = ParseException(instring, len(instring), e.errmsg, self) 2415 maxExcLoc = len(instring) 2416 2417 # only got here if no expression matched, raise exception for match that made it the furthest 2418 else: 2419 if maxException is not None: 2420 raise maxException 2421 else: 2422 raise ParseException(instring, loc, "no defined alternatives to match", self) 2423 2424 def __ior__(self, other): 2425 if isinstance(other, str): 2426 other = Literal(other) 2427 return self.append(other) # MatchFirst( [ self, other ] ) 2428 2429 def __str__(self): 2430 if hasattr(self, "name"): 2431 return self.name 2432 2433 if self.strRepr is None: 2434 self.strRepr = "{" + " | ".join([str(e) for e in self.exprs]) + "}" 2435 2436 return self.strRepr 2437 2438 def checkRecursion(self, parseElementList): 2439 subRecCheckList = parseElementList[:] + [self] 2440 for e in self.exprs: 2441 e.checkRecursion(subRecCheckList) 2442 2443 2444class Each(ParseExpression): 2445 """Requires all given ParseExpressions to be found, but in any order. 2446 Expressions may be separated by whitespace. 2447 May be constructed using the '&' operator. 2448 """ 2449 2450 def __init__(self, exprs, savelist=True): 2451 super().__init__(exprs, savelist) 2452 self.mayReturnEmpty = True 2453 for e in self.exprs: 2454 if not e.mayReturnEmpty: 2455 self.mayReturnEmpty = False 2456 break 2457 self.skipWhitespace = True 2458 self.initExprGroups = True 2459 2460 def parseImpl(self, instring, loc, doActions=True): 2461 if self.initExprGroups: 2462 self.optionals = [e.expr for e in self.exprs if isinstance(e, Optional)] 2463 self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)] 2464 self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)] 2465 self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))] 2466 self.required += self.multirequired 2467 self.initExprGroups = False 2468 tmpLoc = loc 2469 tmpReqd = self.required[:] 2470 tmpOpt = self.optionals[:] 2471 matchOrder = [] 2472 2473 keepMatching = True 2474 while keepMatching: 2475 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2476 failed = [] 2477 for e in tmpExprs: 2478 try: 2479 tmpLoc = e.tryParse(instring, tmpLoc) 2480 except ParseException: 2481 failed.append(e) 2482 else: 2483 matchOrder.append(e) 2484 if e in tmpReqd: 2485 tmpReqd.remove(e) 2486 elif e in tmpOpt: 2487 tmpOpt.remove(e) 2488 if len(failed) == len(tmpExprs): 2489 keepMatching = False 2490 2491 if tmpReqd: 2492 missing = ", ".join([str(e) for e in tmpReqd]) 2493 raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing) 2494 2495 # add any unmatched Optionals, in case they have default values defined 2496 matchOrder += list(e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt) 2497 2498 resultlist = [] 2499 for e in matchOrder: 2500 loc, results = e._parse(instring, loc, doActions) 2501 resultlist.append(results) 2502 2503 finalResults = ParseResults([]) 2504 for r in resultlist: 2505 dups = {} 2506 for k in r.keys(): 2507 if k in finalResults.keys(): 2508 tmp = ParseResults(finalResults[k]) 2509 tmp += ParseResults(r[k]) 2510 dups[k] = tmp 2511 finalResults += ParseResults(r) 2512 for k, v in dups.items(): 2513 finalResults[k] = v 2514 return loc, finalResults 2515 2516 def __str__(self): 2517 if hasattr(self, "name"): 2518 return self.name 2519 2520 if self.strRepr is None: 2521 self.strRepr = "{" + " & ".join([str(e) for e in self.exprs]) + "}" 2522 2523 return self.strRepr 2524 2525 def checkRecursion(self, parseElementList): 2526 subRecCheckList = parseElementList[:] + [self] 2527 for e in self.exprs: 2528 e.checkRecursion(subRecCheckList) 2529 2530 2531class ParseElementEnhance(ParserElement): 2532 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens.""" 2533 2534 def __init__(self, expr, savelist=False): 2535 super().__init__(savelist) 2536 if isinstance(expr, str): 2537 expr = Literal(expr) 2538 self.expr = expr 2539 self.strRepr = None 2540 if expr is not None: 2541 self.mayIndexError = expr.mayIndexError 2542 self.mayReturnEmpty = expr.mayReturnEmpty 2543 self.setWhitespaceChars(expr.whiteChars) 2544 self.skipWhitespace = expr.skipWhitespace 2545 self.saveAsList = expr.saveAsList 2546 self.callPreparse = expr.callPreparse 2547 self.ignoreExprs.extend(expr.ignoreExprs) 2548 2549 def parseImpl(self, instring, loc, doActions=True): 2550 if self.expr is not None: 2551 return self.expr._parse(instring, loc, doActions, callPreParse=False) 2552 else: 2553 raise ParseException("", loc, self.errmsg, self) 2554 2555 def leaveWhitespace(self): 2556 self.skipWhitespace = False 2557 self.expr = self.expr.copy() 2558 if self.expr is not None: 2559 self.expr.leaveWhitespace() 2560 return self 2561 2562 def ignore(self, other): 2563 if isinstance(other, Suppress): 2564 if other not in self.ignoreExprs: 2565 super().ignore(other) 2566 if self.expr is not None: 2567 self.expr.ignore(self.ignoreExprs[-1]) 2568 else: 2569 super().ignore(other) 2570 if self.expr is not None: 2571 self.expr.ignore(self.ignoreExprs[-1]) 2572 return self 2573 2574 def streamline(self): 2575 super().streamline() 2576 if self.expr is not None: 2577 self.expr.streamline() 2578 return self 2579 2580 def checkRecursion(self, parseElementList): 2581 if self in parseElementList: 2582 raise RecursiveGrammarException(parseElementList+[self]) 2583 subRecCheckList = parseElementList[:] + [self] 2584 if self.expr is not None: 2585 self.expr.checkRecursion(subRecCheckList) 2586 2587 def validate(self, validateTrace=None): 2588 if validateTrace is None: 2589 validateTrace = [] 2590 tmp = validateTrace[:]+[self] 2591 if self.expr is not None: 2592 self.expr.validate(tmp) 2593 self.checkRecursion([]) 2594 2595 def __str__(self): 2596 try: 2597 return super().__str__() 2598 except Exception: 2599 pass 2600 2601 if self.strRepr is None and self.expr is not None: 2602 self.strRepr = f"{self.__class__.__name__}:({str(self.expr)})" 2603 return self.strRepr 2604 2605 2606class FollowedBy(ParseElementEnhance): 2607 """Lookahead matching of the given parse expression. FollowedBy 2608 does *not* advance the parsing position within the input string, it only 2609 verifies that the specified parse expression matches at the current 2610 position. FollowedBy always returns a null token list.""" 2611 2612 def __init__(self, expr): 2613 super().__init__(expr) 2614 self.mayReturnEmpty = True 2615 2616 def parseImpl(self, instring, loc, doActions=True): 2617 self.expr.tryParse(instring, loc) 2618 return loc, [] 2619 2620 2621class NotAny(ParseElementEnhance): 2622 """Lookahead to disallow matching with the given parse expression. NotAny 2623 does *not* advance the parsing position within the input string, it only 2624 verifies that the specified parse expression does *not* match at the current 2625 position. Also, NotAny does *not* skip over leading whitespace. NotAny 2626 always returns a null token list. May be constructed using the '~' operator.""" 2627 2628 def __init__(self, expr): 2629 super().__init__(expr) 2630 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2631 self.mayReturnEmpty = True 2632 self.errmsg = "Found unwanted token, "+str(self.expr) 2633 2634 def parseImpl(self, instring, loc, doActions=True): 2635 try: 2636 self.expr.tryParse(instring, loc) 2637 except (ParseException, IndexError): 2638 pass 2639 else: 2640 exc = self.myException 2641 exc.loc = loc 2642 exc.pstr = instring 2643 raise exc 2644 return loc, [] 2645 2646 def __str__(self): 2647 if hasattr(self, "name"): 2648 return self.name 2649 2650 if self.strRepr is None: 2651 self.strRepr = "~{" + str(self.expr) + "}" 2652 2653 return self.strRepr 2654 2655 2656class ZeroOrMore(ParseElementEnhance): 2657 """Optional repetition of zero or more of the given expression.""" 2658 2659 def __init__(self, expr): 2660 super().__init__(expr) 2661 self.mayReturnEmpty = True 2662 2663 def parseImpl(self, instring, loc, doActions=True): 2664 tokens = [] 2665 try: 2666 loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) 2667 hasIgnoreExprs = (len(self.ignoreExprs) > 0) 2668 while True: 2669 if hasIgnoreExprs: 2670 preloc = self._skipIgnorables(instring, loc) 2671 else: 2672 preloc = loc 2673 loc, tmptokens = self.expr._parse(instring, preloc, doActions) 2674 if tmptokens or tmptokens.keys(): 2675 tokens += tmptokens 2676 except (ParseException, IndexError): 2677 pass 2678 2679 return loc, tokens 2680 2681 def __str__(self): 2682 if hasattr(self, "name"): 2683 return self.name 2684 2685 if self.strRepr is None: 2686 self.strRepr = "[" + str(self.expr) + "]..." 2687 2688 return self.strRepr 2689 2690 def setResultsName(self, name, listAllMatches=False): 2691 ret = super().setResultsName(name, listAllMatches) 2692 ret.saveAsList = True 2693 return ret 2694 2695 2696class OneOrMore(ParseElementEnhance): 2697 """Repetition of one or more of the given expression.""" 2698 2699 def parseImpl(self, instring, loc, doActions=True): 2700 # must be at least one 2701 loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) 2702 try: 2703 hasIgnoreExprs = (len(self.ignoreExprs) > 0) 2704 while True: 2705 if hasIgnoreExprs: 2706 preloc = self._skipIgnorables(instring, loc) 2707 else: 2708 preloc = loc 2709 loc, tmptokens = self.expr._parse(instring, preloc, doActions) 2710 if tmptokens or tmptokens.keys(): 2711 tokens += tmptokens 2712 except (ParseException, IndexError): 2713 pass 2714 2715 return loc, tokens 2716 2717 def __str__(self): 2718 if hasattr(self, "name"): 2719 return self.name 2720 2721 if self.strRepr is None: 2722 self.strRepr = "{" + str(self.expr) + "}..." 2723 2724 return self.strRepr 2725 2726 def setResultsName(self, name, listAllMatches=False): 2727 ret = super().setResultsName(name, listAllMatches) 2728 ret.saveAsList = True 2729 return ret 2730 2731 2732class _NullToken: 2733 def __bool__(self): 2734 return False 2735 __nonzero__ = __bool__ 2736 2737 def __str__(self): 2738 return "" 2739 2740 2741_optionalNotMatched = _NullToken() 2742 2743 2744class Optional(ParseElementEnhance): 2745 """Optional matching of the given expression. 2746 A default return string can also be specified, if the optional expression 2747 is not found. 2748 """ 2749 2750 def __init__(self, exprs, default=_optionalNotMatched): 2751 super().__init__(exprs, savelist=False) 2752 self.defaultValue = default 2753 self.mayReturnEmpty = True 2754 2755 def parseImpl(self, instring, loc, doActions=True): 2756 try: 2757 loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) 2758 except (ParseException, IndexError): 2759 if self.defaultValue is not _optionalNotMatched: 2760 if self.expr.resultsName: 2761 tokens = ParseResults([self.defaultValue]) 2762 tokens[self.expr.resultsName] = self.defaultValue 2763 else: 2764 tokens = [self.defaultValue] 2765 else: 2766 tokens = [] 2767 return loc, tokens 2768 2769 def __str__(self): 2770 if hasattr(self, "name"): 2771 return self.name 2772 2773 if self.strRepr is None: 2774 self.strRepr = "[" + str(self.expr) + "]" 2775 2776 return self.strRepr 2777 2778 2779class SkipTo(ParseElementEnhance): 2780 """Token for skipping over all undefined text until the matched expression is found. 2781 If include is set to true, the matched expression is also consumed. The ignore 2782 argument is used to define grammars (typically quoted strings and comments) that 2783 might contain false matches. 2784 """ 2785 2786 def __init__(self, other, include=False, ignore=None): 2787 super().__init__(other) 2788 if ignore is not None: 2789 self.expr = self.expr.copy() 2790 self.expr.ignore(ignore) 2791 self.mayReturnEmpty = True 2792 self.mayIndexError = False 2793 self.includeMatch = include 2794 self.asList = False 2795 self.errmsg = "No match found for "+str(self.expr) 2796 2797 def parseImpl(self, instring, loc, doActions=True): 2798 startLoc = loc 2799 instrlen = len(instring) 2800 expr = self.expr 2801 while loc <= instrlen: 2802 try: 2803 loc = expr._skipIgnorables(instring, loc) 2804 expr._parse(instring, loc, doActions=False, callPreParse=False) 2805 if self.includeMatch: 2806 skipText = instring[startLoc:loc] 2807 loc, mat = expr._parse(instring, loc, doActions, callPreParse=False) 2808 if mat: 2809 skipRes = ParseResults(skipText) 2810 skipRes += mat 2811 return loc, [skipRes] 2812 else: 2813 return loc, [skipText] 2814 else: 2815 return loc, [instring[startLoc:loc]] 2816 except (ParseException, IndexError): 2817 loc += 1 2818 exc = self.myException 2819 exc.loc = loc 2820 exc.pstr = instring 2821 raise exc 2822 2823 2824class Forward(ParseElementEnhance): 2825 """Forward declaration of an expression to be defined later - 2826 used for recursive grammars, such as algebraic infix notation. 2827 When the expression is known, it is assigned to the Forward variable using the '<<' operator. 2828 2829 Note: take care when assigning to Forward not to overlook precedence of operators. 2830 Specifically, '|' has a lower precedence than '<<', so that:: 2831 fwdExpr << a | b | c 2832 will actually be evaluated as:: 2833 (fwdExpr << a) | b | c 2834 thereby leaving b and c out as parseable alternatives. It is recommended that you 2835 explicitly group the values inserted into the Forward:: 2836 fwdExpr << (a | b | c) 2837 """ 2838 2839 def __init__(self, other=None): 2840 super().__init__(other, savelist=False) 2841 2842 def __lshift__(self, other): 2843 if isinstance(other, str): 2844 other = Literal(other) 2845 self.expr = other 2846 self.mayReturnEmpty = other.mayReturnEmpty 2847 self.strRepr = None 2848 self.mayIndexError = self.expr.mayIndexError 2849 self.mayReturnEmpty = self.expr.mayReturnEmpty 2850 self.setWhitespaceChars(self.expr.whiteChars) 2851 self.skipWhitespace = self.expr.skipWhitespace 2852 self.saveAsList = self.expr.saveAsList 2853 self.ignoreExprs.extend(self.expr.ignoreExprs) 2854 return None 2855 2856 def leaveWhitespace(self): 2857 self.skipWhitespace = False 2858 return self 2859 2860 def streamline(self): 2861 if not self.streamlined: 2862 self.streamlined = True 2863 if self.expr is not None: 2864 self.expr.streamline() 2865 return self 2866 2867 def validate(self, validateTrace=None): 2868 if validateTrace is None: 2869 validateTrace = [] 2870 if self not in validateTrace: 2871 tmp = validateTrace[:]+[self] 2872 if self.expr is not None: 2873 self.expr.validate(tmp) 2874 self.checkRecursion([]) 2875 2876 def __str__(self): 2877 if hasattr(self, "name"): 2878 return self.name 2879 2880 self.__class__ = _ForwardNoRecurse 2881 try: 2882 if self.expr is not None: 2883 retString = str(self.expr) 2884 else: 2885 retString = "None" 2886 finally: 2887 self.__class__ = Forward 2888 return "Forward: "+retString 2889 2890 def copy(self): 2891 if self.expr is not None: 2892 return super().copy() 2893 else: 2894 ret = Forward() 2895 ret << self 2896 return ret 2897 2898 2899class _ForwardNoRecurse(Forward): 2900 def __str__(self): 2901 return "..." 2902 2903 2904class TokenConverter(ParseElementEnhance): 2905 """Abstract subclass of ParseExpression, for converting parsed results.""" 2906 2907 def __init__(self, expr, savelist=False): 2908 super().__init__(expr) # , savelist ) 2909 self.saveAsList = False 2910 2911 2912class Upcase(TokenConverter): 2913 """Converter to upper case all matching tokens.""" 2914 2915 def __init__(self, *args): 2916 super().__init__(*args) 2917 warnings.warn( 2918 "Upcase class is deprecated, use upcaseTokens parse action instead", 2919 DeprecationWarning, 2920 stacklevel=2) 2921 2922 def postParse(self, instring, loc, tokenlist): 2923 return list(map(string.upper, tokenlist)) 2924 2925 2926class Combine(TokenConverter): 2927 """Converter to concatenate all matching tokens to a single string. 2928 By default, the matching patterns must also be contiguous in the input string; 2929 this can be disabled by specifying 'adjacent=False' in the constructor. 2930 """ 2931 2932 def __init__(self, expr, joinString="", adjacent=True): 2933 super().__init__(expr) 2934 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 2935 if adjacent: 2936 self.leaveWhitespace() 2937 self.adjacent = adjacent 2938 self.skipWhitespace = True 2939 self.joinString = joinString 2940 2941 def ignore(self, other): 2942 if self.adjacent: 2943 ParserElement.ignore(self, other) 2944 else: 2945 super().ignore(other) 2946 return self 2947 2948 def postParse(self, instring, loc, tokenlist): 2949 retToks = tokenlist.copy() 2950 del retToks[:] 2951 retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults) 2952 2953 if self.resultsName and len(retToks.keys()) > 0: 2954 return [retToks] 2955 else: 2956 return retToks 2957 2958 2959class Group(TokenConverter): 2960 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions.""" 2961 2962 def __init__(self, expr): 2963 super().__init__(expr) 2964 self.saveAsList = True 2965 2966 def postParse(self, instring, loc, tokenlist): 2967 return [tokenlist] 2968 2969 2970class Dict(TokenConverter): 2971 """Converter to return a repetitive expression as a list, but also as a dictionary. 2972 Each element can also be referenced using the first token in the expression as its key. 2973 Useful for tabular report scraping when the first column can be used as a item key. 2974 """ 2975 2976 def __init__(self, exprs): 2977 super().__init__(exprs) 2978 self.saveAsList = True 2979 2980 def postParse(self, instring, loc, tokenlist): 2981 for i, tok in enumerate(tokenlist): 2982 if len(tok) == 0: 2983 continue 2984 ikey = tok[0] 2985 if isinstance(ikey, int): 2986 ikey = str(tok[0]).strip() 2987 if len(tok) == 1: 2988 tokenlist[ikey] = _ParseResultsWithOffset("", i) 2989 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 2990 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 2991 else: 2992 dictvalue = tok.copy() # ParseResults(i) 2993 del dictvalue[0] 2994 if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.keys()): 2995 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 2996 else: 2997 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 2998 2999 if self.resultsName: 3000 return [tokenlist] 3001 else: 3002 return tokenlist 3003 3004 3005class Suppress(TokenConverter): 3006 """Converter for ignoring the results of a parsed expression.""" 3007 3008 def postParse(self, instring, loc, tokenlist): 3009 return [] 3010 3011 def suppress(self): 3012 return self 3013 3014 3015class OnlyOnce: 3016 """Wrapper for parse actions, to ensure they are only called once.""" 3017 3018 def __init__(self, methodCall): 3019 self.callable = ParserElement._normalizeParseActionArgs(methodCall) 3020 self.called = False 3021 3022 def __call__(self, s, l, t): 3023 if not self.called: 3024 results = self.callable(s, l, t) 3025 self.called = True 3026 return results 3027 raise ParseException(s, l, "") 3028 3029 def reset(self): 3030 self.called = False 3031 3032 3033def traceParseAction(f): 3034 """Decorator for debugging parse actions.""" 3035 f = ParserElement._normalizeParseActionArgs(f) 3036 3037 def z(*paArgs): 3038 thisFunc = f.func_name 3039 s, l, t = paArgs[-3:] 3040 if len(paArgs) > 3: 3041 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3042 sys.stderr.write(">>entering %s(line: '%s', %d, %s)\n" % (thisFunc, line(l, s), l, t)) 3043 try: 3044 ret = f(*paArgs) 3045 except Exception as exc: 3046 sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n") 3047 raise 3048 sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret})\n") 3049 return ret 3050 try: 3051 z.__name__ = f.__name__ 3052 except AttributeError: 3053 pass 3054 return z 3055 3056# 3057# global helpers 3058# 3059 3060 3061def delimitedList(expr, delim=",", combine=False): 3062 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3063 By default, the list elements and delimiters can have intervening whitespace, and 3064 comments, but this can be overridden by passing 'combine=True' in the constructor. 3065 If combine is set to True, the matching tokens are returned as a single token 3066 string, with the delimiters included; otherwise, the matching tokens are returned 3067 as a list of tokens, with the delimiters suppressed. 3068 """ 3069 dlName = str(expr)+" ["+str(delim)+" "+str(expr)+"]..." 3070 if combine: 3071 return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName) 3072 else: 3073 return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName) 3074 3075 3076def countedArray(expr): 3077 """Helper to define a counted list of expressions. 3078 This helper defines a pattern of the form:: 3079 integer expr expr expr... 3080 where the leading integer tells how many expr expressions follow. 3081 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3082 """ 3083 arrayExpr = Forward() 3084 3085 def countFieldParseAction(s, l, t): 3086 n = int(t[0]) 3087 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3088 return [] 3089 return (Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr) 3090 3091 3092def _flatten(L): 3093 if not isinstance(L, list): 3094 return [L] 3095 if L == []: 3096 return L 3097 return _flatten(L[0]) + _flatten(L[1:]) 3098 3099 3100def matchPreviousLiteral(expr): 3101 """Helper to define an expression that is indirectly defined from 3102 the tokens matched in a previous expression, that is, it looks 3103 for a 'repeat' of a previous expression. For example:: 3104 first = Word(nums) 3105 second = matchPreviousLiteral(first) 3106 matchExpr = first + ":" + second 3107 will match "1:1", but not "1:2". Because this matches a 3108 previous literal, will also match the leading "1:1" in "1:10". 3109 If this is not desired, use matchPreviousExpr. 3110 Do *not* use with packrat parsing enabled. 3111 """ 3112 rep = Forward() 3113 3114 def copyTokenToRepeater(s, l, t): 3115 if t: 3116 if len(t) == 1: 3117 rep << t[0] 3118 else: 3119 # flatten t tokens 3120 tflat = _flatten(t.asList()) 3121 rep << And([Literal(tt) for tt in tflat]) 3122 else: 3123 rep << Empty() 3124 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3125 return rep 3126 3127 3128def matchPreviousExpr(expr): 3129 """Helper to define an expression that is indirectly defined from 3130 the tokens matched in a previous expression, that is, it looks 3131 for a 'repeat' of a previous expression. For example:: 3132 first = Word(nums) 3133 second = matchPreviousExpr(first) 3134 matchExpr = first + ":" + second 3135 will match "1:1", but not "1:2". Because this matches by 3136 expressions, will *not* match the leading "1:1" in "1:10"; 3137 the expressions are evaluated first, and then compared, so 3138 "1" is compared with "10". 3139 Do *not* use with packrat parsing enabled. 3140 """ 3141 rep = Forward() 3142 e2 = expr.copy() 3143 rep << e2 3144 3145 def copyTokenToRepeater(s, l, t): 3146 matchTokens = _flatten(t.asList()) 3147 3148 def mustMatchTheseTokens(s, l, t): 3149 theseTokens = _flatten(t.asList()) 3150 if theseTokens != matchTokens: 3151 raise ParseException("", 0, "") 3152 rep.setParseAction(mustMatchTheseTokens, callDuringTry=True) 3153 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3154 return rep 3155 3156 3157def _escapeRegexRangeChars(s): 3158 # ~ escape these chars: ^-] 3159 for c in r"\^-]": 3160 s = s.replace(c, "\\"+c) 3161 s = s.replace("\n", r"\n") 3162 s = s.replace("\t", r"\t") 3163 return str(s) 3164 3165 3166def oneOf(strs, caseless=False, useRegex=True): 3167 """Helper to quickly define a set of alternative Literals, and makes sure to do 3168 longest-first testing when there is a conflict, regardless of the input order, 3169 but returns a MatchFirst for best performance. 3170 3171 Parameters: 3172 - strs - a string of space-delimited literals, or a list of string literals 3173 - caseless - (default=False) - treat all literals as caseless 3174 - useRegex - (default=True) - as an optimization, will generate a Regex 3175 object; otherwise, will generate a MatchFirst object (if caseless=True, or 3176 if creating a Regex raises an exception) 3177 """ 3178 if caseless: 3179 isequal = (lambda a, b: a.upper() == b.upper()) 3180 masks = (lambda a, b: b.upper().startswith(a.upper())) 3181 parseElementClass = CaselessLiteral 3182 else: 3183 isequal = (lambda a, b: a == b) 3184 masks = (lambda a, b: b.startswith(a)) 3185 parseElementClass = Literal 3186 3187 if isinstance(strs, (list, tuple)): 3188 symbols = strs[:] 3189 elif isinstance(strs, str): 3190 symbols = strs.split() 3191 else: 3192 warnings.warn( 3193 "Invalid argument to oneOf, expected string or list", 3194 SyntaxWarning, 3195 stacklevel=2) 3196 3197 i = 0 3198 while i < len(symbols)-1: 3199 cur = symbols[i] 3200 for j, other in enumerate(symbols[i+1:]): 3201 if isequal(other, cur): 3202 del symbols[i+j+1] 3203 break 3204 elif masks(cur, other): 3205 del symbols[i+j+1] 3206 symbols.insert(i, other) 3207 cur = other 3208 break 3209 else: 3210 i += 1 3211 3212 if not caseless and useRegex: 3213 # ~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3214 try: 3215 if len(symbols) == len("".join(symbols)): 3216 return Regex("[%s]" % "".join([_escapeRegexRangeChars(sym) for sym in symbols])) 3217 else: 3218 return Regex("|".join([re.escape(sym) for sym in symbols])) 3219 except Exception: 3220 warnings.warn( 3221 "Exception creating Regex for oneOf, building MatchFirst", 3222 SyntaxWarning, 3223 stacklevel=2) 3224 3225 # last resort, just use MatchFirst 3226 return MatchFirst([parseElementClass(sym) for sym in symbols]) 3227 3228 3229def dictOf(key, value): 3230 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3231 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 3232 in the proper order. The key pattern can include delimiting markers or punctuation, 3233 as long as they are suppressed, thereby leaving the significant key text. The value 3234 pattern can include named results, so that the Dict results can include named token 3235 fields. 3236 """ 3237 return Dict(ZeroOrMore(Group(key + value))) 3238 3239 3240# convenience constants for positional expressions 3241empty = Empty().setName("empty") 3242lineStart = LineStart().setName("lineStart") 3243lineEnd = LineEnd().setName("lineEnd") 3244stringStart = StringStart().setName("stringStart") 3245stringEnd = StringEnd().setName("stringEnd") 3246 3247_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1]) 3248_printables_less_backslash = "".join([c for c in printables if c not in r"\]"]) 3249_escapedHexChar = Combine(Suppress(_bslash + "0x") + Word(hexnums)).setParseAction(lambda s, l, t: chr(int(t[0], 16))) 3250_escapedOctChar = Combine(Suppress(_bslash) + Word("0", "01234567")).setParseAction(lambda s, l, t: chr(int(t[0], 8))) 3251_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash, exact=1) 3252_charRange = Group(_singleChar + Suppress("-") + _singleChar) 3253_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]" 3254 3255 3256def _expanded(p): 3257 return (isinstance(p, ParseResults) and ''.join([chr(c) for c in range(ord(p[0]), ord(p[1])+1)]) or p) 3258 3259 3260def srange(s): 3261 r"""Helper to easily define string ranges for use in Word construction. Borrows 3262 syntax from regexp '[]' string range definitions:: 3263 srange("[0-9]") -> "0123456789" 3264 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3265 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3266 The input string must be enclosed in []'s, and the returned string is the expanded 3267 character set joined into a single string. 3268 The values enclosed in the []'s may be:: 3269 a single character 3270 an escaped character with a leading backslash (such as \- or \]) 3271 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 3272 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3273 a range of any of the above, separated by a dash ('a-z', etc.) 3274 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3275 """ 3276 try: 3277 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3278 except Exception: 3279 return "" 3280 3281 3282def matchOnlyAtCol(n): 3283 """Helper method for defining parse actions that require matching at a specific 3284 column in the input text. 3285 """ 3286 def verifyCol(strg, locn, toks): 3287 if col(locn, strg) != n: 3288 raise ParseException(strg, locn, "matched token not at column %d" % n) 3289 return verifyCol 3290 3291 3292def replaceWith(replStr): 3293 """Helper method for common parse actions that simply return a literal value. Especially 3294 useful when used with transformString(). 3295 """ 3296 def _replFunc(*args): 3297 return [replStr] 3298 return _replFunc 3299 3300 3301def removeQuotes(s, l, t): 3302 """Helper parse action for removing quotation marks from parsed quoted strings. 3303 To use, add this parse action to quoted string using:: 3304 quotedString.setParseAction( removeQuotes ) 3305 """ 3306 return t[0][1:-1] 3307 3308 3309def upcaseTokens(s, l, t): 3310 """Helper parse action to convert tokens to upper case.""" 3311 return [tt.upper() for tt in map(str, t)] 3312 3313 3314def downcaseTokens(s, l, t): 3315 """Helper parse action to convert tokens to lower case.""" 3316 return [tt.lower() for tt in map(str, t)] 3317 3318 3319def keepOriginalText(s, startLoc, t): 3320 """Helper parse action to preserve original parsed text, 3321 overriding any nested parse actions.""" 3322 try: 3323 endloc = getTokensEndLoc() 3324 except ParseException: 3325 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3326 del t[:] 3327 t += ParseResults(s[startLoc:endloc]) 3328 return t 3329 3330 3331def getTokensEndLoc(): 3332 """Method to be called from within a parse action to determine the end 3333 location of the parsed tokens.""" 3334 import inspect 3335 fstack = inspect.stack() 3336 try: 3337 # search up the stack (through intervening argument normalizers) for correct calling routine 3338 for f in fstack[2:]: 3339 if f[3] == "_parseNoCache": 3340 endloc = f[0].f_locals["loc"] 3341 return endloc 3342 else: 3343 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3344 finally: 3345 del fstack 3346 3347 3348def _makeTags(tagStr, xml): 3349 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3350 if isinstance(tagStr, str): 3351 resname = tagStr 3352 tagStr = Keyword(tagStr, caseless=not xml) 3353 else: 3354 resname = tagStr.name 3355 3356 tagAttrName = Word(alphas, alphanums+"_-:") 3357 if xml: 3358 tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) 3359 openTag = Suppress("<") + tagStr + \ 3360 Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + \ 3361 Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == '/') + Suppress(">") 3362 else: 3363 printablesLessRAbrack = "".join([c for c in printables if c not in ">"]) 3364 tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printablesLessRAbrack) 3365 openTag = Suppress("<") + tagStr + \ 3366 Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) + Optional(Suppress("=") + tagAttrValue)))) + \ 3367 Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == '/') + Suppress(">") 3368 closeTag = Combine(_L("</") + tagStr + ">") 3369 3370 openTag = openTag.setResultsName("start"+"".join(resname.replace(":", " ").title().split())).setName("<%s>" % tagStr) 3371 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":", " ").title().split())).setName("</%s>" % tagStr) 3372 3373 return openTag, closeTag 3374 3375 3376def makeHTMLTags(tagStr): 3377 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3378 return _makeTags(tagStr, False) 3379 3380 3381def makeXMLTags(tagStr): 3382 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3383 return _makeTags(tagStr, True) 3384 3385 3386def withAttribute(*args, **attrDict): 3387 """Helper to create a validating parse action to be used with start tags created 3388 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 3389 with a required attribute value, to avoid false matches on common tags such as 3390 <TD> or <DIV>. 3391 3392 Call withAttribute with a series of attribute names and values. Specify the list 3393 of filter attributes names and values as: 3394 - keyword arguments, as in (class="Customer",align="right"), or 3395 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3396 For attribute names with a namespace prefix, you must use the second form. Attribute 3397 names are matched insensitive to upper/lower case. 3398 3399 To verify that the attribute exists, but without specifying a value, pass 3400 withAttribute.ANY_VALUE as the value. 3401 """ 3402 if args: 3403 attrs = args[:] 3404 else: 3405 attrs = attrDict.items() 3406 attrs = [(k, v) for k, v in attrs] 3407 3408 def pa(s, l, tokens): 3409 for attrName, attrValue in attrs: 3410 if attrName not in tokens: 3411 raise ParseException(s, l, "no matching attribute " + attrName) 3412 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3413 raise ParseException(s, l, f"attribute '{attrName}' has value '{tokens[attrName]}', must be '{attrValue}'") 3414 return pa 3415 3416 3417withAttribute.ANY_VALUE = object() 3418 3419opAssoc = _Constants() 3420opAssoc.LEFT = object() 3421opAssoc.RIGHT = object() 3422 3423 3424def operatorPrecedence(baseExpr, opList): 3425 """Helper method for constructing grammars of expressions made up of 3426 operators working in a precedence hierarchy. Operators may be unary or 3427 binary, left- or right-associative. Parse actions can also be attached 3428 to operator expressions. 3429 3430 Parameters: 3431 - baseExpr - expression representing the most basic element for the nested 3432 - opList - list of tuples, one for each operator precedence level in the 3433 expression grammar; each tuple is of the form 3434 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3435 - opExpr is the pyparsing expression for the operator; 3436 may also be a string, which will be converted to a Literal; 3437 if numTerms is 3, opExpr is a tuple of two expressions, for the 3438 two operators separating the 3 terms 3439 - numTerms is the number of terms for this operator (must 3440 be 1, 2, or 3) 3441 - rightLeftAssoc is the indicator whether the operator is 3442 right or left associative, using the pyparsing-defined 3443 constants opAssoc.RIGHT and opAssoc.LEFT. 3444 - parseAction is the parse action to be associated with 3445 expressions matching this operator expression (the 3446 parse action tuple member may be omitted) 3447 """ 3448 ret = Forward() 3449 lastExpr = baseExpr | (Suppress('(') + ret + Suppress(')')) 3450 for operDef in opList: 3451 opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] 3452 if arity == 3: 3453 if opExpr is None or len(opExpr) != 2: 3454 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3455 opExpr1, opExpr2 = opExpr 3456 thisExpr = Forward() # .setName("expr%d" % i) 3457 if rightLeftAssoc == opAssoc.LEFT: 3458 if arity == 1: 3459 matchExpr = FollowedBy(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr)) 3460 elif arity == 2: 3461 if opExpr is not None: 3462 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr)) 3463 else: 3464 matchExpr = FollowedBy(lastExpr+lastExpr) + Group(lastExpr + OneOrMore(lastExpr)) 3465 elif arity == 3: 3466 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3467 Group(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) 3468 else: 3469 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3470 elif rightLeftAssoc == opAssoc.RIGHT: 3471 if arity == 1: 3472 # try to avoid LR with this extra test 3473 if not isinstance(opExpr, Optional): 3474 opExpr = Optional(opExpr) 3475 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) 3476 elif arity == 2: 3477 if opExpr is not None: 3478 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr)) 3479 else: 3480 matchExpr = FollowedBy(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr)) 3481 elif arity == 3: 3482 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3483 Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 3484 else: 3485 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3486 else: 3487 raise ValueError("operator must indicate right or left associativity") 3488 if pa: 3489 matchExpr.setParseAction(pa) 3490 thisExpr << (matchExpr | lastExpr) 3491 lastExpr = thisExpr 3492 ret << lastExpr 3493 return ret 3494 3495 3496dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3497sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3498quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3499unicodeString = Combine(_L('u') + quotedString.copy()) 3500 3501 3502def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): 3503 """Helper method for defining nested lists enclosed in opening and closing 3504 delimiters ("(" and ")" are the default). 3505 3506 Parameters: 3507 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3508 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3509 - content - expression for items within the nested lists (default=None) 3510 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3511 3512 If an expression is not provided for the content argument, the nested 3513 expression will capture all whitespace-delimited content between delimiters 3514 as a list of separate values. 3515 3516 Use the ignoreExpr argument to define expressions that may contain 3517 opening or closing characters that should not be treated as opening 3518 or closing characters for nesting, such as quotedString or a comment 3519 expression. Specify multiple expressions using an Or or MatchFirst. 3520 The default is quotedString, but if no expressions are to be ignored, 3521 then pass None for this argument. 3522 """ 3523 if opener == closer: 3524 raise ValueError("opening and closing strings cannot be the same") 3525 if content is None: 3526 if isinstance(opener, str) and isinstance(closer, str): 3527 if ignoreExpr is not None: 3528 content = (Combine(OneOrMore(~ignoreExpr + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS, exact=1))) 3529 .setParseAction(lambda t: t[0].strip())) 3530 else: 3531 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t: t[0].strip())) 3532 else: 3533 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3534 ret = Forward() 3535 if ignoreExpr is not None: 3536 ret << Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)) 3537 else: 3538 ret << Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 3539 return ret 3540 3541 3542def indentedBlock(blockStatementExpr, indentStack, indent=True): 3543 """Helper method for defining space-delimited indentation blocks, such as 3544 those used to define block statements in Python source code. 3545 3546 Parameters: 3547 - blockStatementExpr - expression defining syntax of statement that 3548 is repeated within the indented block 3549 - indentStack - list created by caller to manage indentation stack 3550 (multiple statementWithIndentedBlock expressions within a single grammar 3551 should share a common indentStack) 3552 - indent - boolean indicating whether block must be indented beyond the 3553 the current level; set to False for block of left-most statements 3554 (default=True) 3555 3556 A valid block must contain at least one blockStatement. 3557 """ 3558 def checkPeerIndent(s, l, t): 3559 if l >= len(s): 3560 return 3561 curCol = col(l, s) 3562 if curCol != indentStack[-1]: 3563 if curCol > indentStack[-1]: 3564 raise ParseFatalException(s, l, "illegal nesting") 3565 raise ParseException(s, l, "not a peer entry") 3566 3567 def checkSubIndent(s, l, t): 3568 curCol = col(l, s) 3569 if curCol > indentStack[-1]: 3570 indentStack.append(curCol) 3571 else: 3572 raise ParseException(s, l, "not a subentry") 3573 3574 def checkUnindent(s, l, t): 3575 if l >= len(s): 3576 return 3577 curCol = col(l, s) 3578 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3579 raise ParseException(s, l, "not an unindent") 3580 indentStack.pop() 3581 3582 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3583 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3584 PEER = Empty().setParseAction(checkPeerIndent) 3585 UNDENT = Empty().setParseAction(checkUnindent) 3586 if indent: 3587 smExpr = Group( 3588 Optional(NL) + FollowedBy(blockStatementExpr) 3589 + INDENT + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) + UNDENT) 3590 else: 3591 smExpr = Group(Optional(NL) + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL)))) 3592 blockStatementExpr.ignore("\\" + LineEnd()) 3593 return smExpr 3594 3595 3596alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3597punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3598 3599anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums+"_:")) 3600commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") + ";") 3601_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(), "><& '")) 3602 3603 3604def replaceHTMLEntity(t): 3605 return t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3606 3607 3608# it's easy to get these comment structures wrong - they're very common, so may as well make them available 3609cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3610 3611htmlComment = Regex(r"<!--[\s\S]*?-->") 3612restOfLine = Regex(r".*").leaveWhitespace() 3613dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3614cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3615 3616javaStyleComment = cppStyleComment 3617pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3618_noncomma = "".join([c for c in printables if c != ","]) 3619_commasepitem = Combine(OneOrMore(Word(_noncomma) + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem") 3620commaSeparatedList = delimitedList(Optional(quotedString | _commasepitem, default="")).setName("commaSeparatedList") 3621 3622 3623if __name__ == "__main__": 3624 3625 def test(teststring): 3626 try: 3627 tokens = simpleSQL.parseString(teststring) 3628 tokenlist = tokens.asList() 3629 print(teststring + "->" + str(tokenlist)) 3630 print("tokens = " + str(tokens)) 3631 print("tokens.columns = " + str(tokens.columns)) 3632 print("tokens.tables = " + str(tokens.tables)) 3633 print(tokens.asXML("SQL", True)) 3634 except ParseBaseException as err: 3635 print(teststring + "->") 3636 print(err.line) 3637 print(" "*(err.column-1) + "^") 3638 print(err) 3639 print() 3640 3641 selectToken = CaselessLiteral("select") 3642 fromToken = CaselessLiteral("from") 3643 3644 ident = Word(alphas, alphanums + "_$") 3645 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 3646 columnNameList = Group(delimitedList(columnName)) # .setName("columns") 3647 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 3648 tableNameList = Group(delimitedList(tableName)) # .setName("tables") 3649 simpleSQL = ( 3650 selectToken 3651 + ('*' | columnNameList).setResultsName("columns") 3652 + fromToken 3653 + tableNameList.setResultsName("tables")) 3654 3655 test("SELECT * from XYZZY, ABC") 3656 test("select * from SYS.XYZZY") 3657 test("Select A from Sys.dual") 3658 test("Select AA,BB,CC from Sys.dual") 3659 test("Select A, B, C from Sys.dual") 3660 test("Select A, B, C from Sys.dual") 3661 test("Xelect A, B, C from Sys.dual") 3662 test("Select A, B, C frox Sys.dual") 3663 test("Select") 3664 test("Select ^^^ frox Sys.dual") 3665 test("Select A, B, C from Sys.dual, Table2 ") 3666