1# -*- coding: utf-8 -*- 2# module pyparsing.py 3# 4# Copyright (c) 2003-2019 Paul T. McGuire 5# 6# Permission is hereby granted, free of charge, to any person obtaining 7# a copy of this software and associated documentation files (the 8# "Software"), to deal in the Software without restriction, including 9# without limitation the rights to use, copy, modify, merge, publish, 10# distribute, sublicense, and/or sell copies of the Software, and to 11# permit persons to whom the Software is furnished to do so, subject to 12# the following conditions: 13# 14# The above copyright notice and this permission notice shall be 15# included in all copies or substantial portions of the Software. 16# 17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 21# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 22# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 23# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24# 25 26__doc__ = \ 27""" 28pyparsing module - Classes and methods to define and execute parsing grammars 29============================================================================= 30 31The pyparsing module is an alternative approach to creating and 32executing simple grammars, vs. the traditional lex/yacc approach, or the 33use of regular expressions. With pyparsing, you don't need to learn 34a new syntax for defining grammars or matching expressions - the parsing 35module provides a library of classes that you use to construct the 36grammar directly in Python. 37 38Here is a program to parse "Hello, World!" (or any greeting of the form 39``"<salutation>, <addressee>!"``), built up using :class:`Word`, 40:class:`Literal`, and :class:`And` elements 41(the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions, 42and the strings are auto-converted to :class:`Literal` expressions):: 43 44 from pyparsing import Word, alphas 45 46 # define grammar of a greeting 47 greet = Word(alphas) + "," + Word(alphas) + "!" 48 49 hello = "Hello, World!" 50 print (hello, "->", greet.parseString(hello)) 51 52The program outputs the following:: 53 54 Hello, World! -> ['Hello', ',', 'World', '!'] 55 56The Python representation of the grammar is quite readable, owing to the 57self-explanatory class names, and the use of '+', '|' and '^' operators. 58 59The :class:`ParseResults` object returned from 60:class:`ParserElement.parseString` can be 61accessed as a nested list, a dictionary, or an object with named 62attributes. 63 64The pyparsing module handles some of the problems that are typically 65vexing when writing text parsers: 66 67 - extra or missing whitespace (the above program will also handle 68 "Hello,World!", "Hello , World !", etc.) 69 - quoted strings 70 - embedded comments 71 72 73Getting Started - 74----------------- 75Visit the classes :class:`ParserElement` and :class:`ParseResults` to 76see the base classes that most other pyparsing 77classes inherit from. Use the docstrings for examples of how to: 78 79 - construct literal match expressions from :class:`Literal` and 80 :class:`CaselessLiteral` classes 81 - construct character word-group expressions using the :class:`Word` 82 class 83 - see how to create repetitive expressions using :class:`ZeroOrMore` 84 and :class:`OneOrMore` classes 85 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`, 86 and :class:`'&'<Each>` operators to combine simple expressions into 87 more complex ones 88 - associate names with your parsed results using 89 :class:`ParserElement.setResultsName` 90 - access the parsed data, which is returned as a :class:`ParseResults` 91 object 92 - find some helpful expression short-cuts like :class:`delimitedList` 93 and :class:`oneOf` 94 - find more useful common expressions in the :class:`pyparsing_common` 95 namespace class 96""" 97 98__version__ = "2.4.7" 99__versionTime__ = "30 Mar 2020 00:43 UTC" 100__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 101 102import string 103from weakref import ref as wkref 104import copy 105import sys 106import warnings 107import re 108import sre_constants 109import collections 110import pprint 111import traceback 112import types 113from datetime import datetime 114from operator import itemgetter 115import itertools 116from functools import wraps 117from contextlib import contextmanager 118 119try: 120 # Python 3 121 from itertools import filterfalse 122except ImportError: 123 from itertools import ifilterfalse as filterfalse 124 125try: 126 from _thread import RLock 127except ImportError: 128 from threading import RLock 129 130try: 131 # Python 3 132 from collections.abc import Iterable 133 from collections.abc import MutableMapping, Mapping 134except ImportError: 135 # Python 2.7 136 from collections import Iterable 137 from collections import MutableMapping, Mapping 138 139try: 140 from collections import OrderedDict as _OrderedDict 141except ImportError: 142 try: 143 from ordereddict import OrderedDict as _OrderedDict 144 except ImportError: 145 _OrderedDict = None 146 147try: 148 from types import SimpleNamespace 149except ImportError: 150 class SimpleNamespace: pass 151 152# version compatibility configuration 153__compat__ = SimpleNamespace() 154__compat__.__doc__ = """ 155 A cross-version compatibility configuration for pyparsing features that will be 156 released in a future version. By setting values in this configuration to True, 157 those features can be enabled in prior versions for compatibility development 158 and testing. 159 160 - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping 161 of results names when an And expression is nested within an Or or MatchFirst; set to 162 True to enable bugfix released in pyparsing 2.3.0, or False to preserve 163 pre-2.3.0 handling of named results 164""" 165__compat__.collect_all_And_tokens = True 166 167__diag__ = SimpleNamespace() 168__diag__.__doc__ = """ 169Diagnostic configuration (all default to False) 170 - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results 171 name is defined on a MatchFirst or Or expression with one or more And subexpressions 172 (only warns if __compat__.collect_all_And_tokens is False) 173 - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results 174 name is defined on a containing expression with ungrouped subexpressions that also 175 have results names 176 - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined 177 with a results name, but has no contents defined 178 - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is 179 incorrectly called with multiple str arguments 180 - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent 181 calls to ParserElement.setName() 182""" 183__diag__.warn_multiple_tokens_in_named_alternation = False 184__diag__.warn_ungrouped_named_tokens_in_collection = False 185__diag__.warn_name_set_on_empty_Forward = False 186__diag__.warn_on_multiple_string_args_to_oneof = False 187__diag__.enable_debug_on_named_expressions = False 188__diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")] 189 190def _enable_all_warnings(): 191 __diag__.warn_multiple_tokens_in_named_alternation = True 192 __diag__.warn_ungrouped_named_tokens_in_collection = True 193 __diag__.warn_name_set_on_empty_Forward = True 194 __diag__.warn_on_multiple_string_args_to_oneof = True 195__diag__.enable_all_warnings = _enable_all_warnings 196 197 198__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__', 199 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 200 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 201 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 202 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 203 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 204 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 205 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char', 206 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 207 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 208 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 209 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 210 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 211 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 212 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 213 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 214 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 215 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass', 216 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set', 217 'conditionAsParseAction', 're', 218 ] 219 220system_version = tuple(sys.version_info)[:3] 221PY_3 = system_version[0] == 3 222if PY_3: 223 _MAX_INT = sys.maxsize 224 basestring = str 225 unichr = chr 226 unicode = str 227 _ustr = str 228 229 # build list of single arg builtins, that can be used as parse actions 230 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 231 232else: 233 _MAX_INT = sys.maxint 234 range = xrange 235 236 def _ustr(obj): 237 """Drop-in replacement for str(obj) that tries to be Unicode 238 friendly. It first tries str(obj). If that fails with 239 a UnicodeEncodeError, then it tries unicode(obj). It then 240 < returns the unicode object | encodes it with the default 241 encoding | ... >. 242 """ 243 if isinstance(obj, unicode): 244 return obj 245 246 try: 247 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 248 # it won't break any existing code. 249 return str(obj) 250 251 except UnicodeEncodeError: 252 # Else encode it 253 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 254 xmlcharref = Regex(r'&#\d+;') 255 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 256 return xmlcharref.transformString(ret) 257 258 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 259 singleArgBuiltins = [] 260 import __builtin__ 261 262 for fname in "sum len sorted reversed list tuple set any all min max".split(): 263 try: 264 singleArgBuiltins.append(getattr(__builtin__, fname)) 265 except AttributeError: 266 continue 267 268_generatorType = type((y for y in range(1))) 269 270def _xml_escape(data): 271 """Escape &, <, >, ", ', etc. in a string of data.""" 272 273 # ampersand must be replaced first 274 from_symbols = '&><"\'' 275 to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split()) 276 for from_, to_ in zip(from_symbols, to_symbols): 277 data = data.replace(from_, to_) 278 return data 279 280alphas = string.ascii_uppercase + string.ascii_lowercase 281nums = "0123456789" 282hexnums = nums + "ABCDEFabcdef" 283alphanums = alphas + nums 284_bslash = chr(92) 285printables = "".join(c for c in string.printable if c not in string.whitespace) 286 287 288def conditionAsParseAction(fn, message=None, fatal=False): 289 msg = message if message is not None else "failed user-defined condition" 290 exc_type = ParseFatalException if fatal else ParseException 291 fn = _trim_arity(fn) 292 293 @wraps(fn) 294 def pa(s, l, t): 295 if not bool(fn(s, l, t)): 296 raise exc_type(s, l, msg) 297 298 return pa 299 300class ParseBaseException(Exception): 301 """base exception class for all parsing runtime exceptions""" 302 # Performance tuning: we construct a *lot* of these, so keep this 303 # constructor as small and fast as possible 304 def __init__(self, pstr, loc=0, msg=None, elem=None): 305 self.loc = loc 306 if msg is None: 307 self.msg = pstr 308 self.pstr = "" 309 else: 310 self.msg = msg 311 self.pstr = pstr 312 self.parserElement = elem 313 self.args = (pstr, loc, msg) 314 315 @classmethod 316 def _from_exception(cls, pe): 317 """ 318 internal factory method to simplify creating one type of ParseException 319 from another - avoids having __init__ signature conflicts among subclasses 320 """ 321 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) 322 323 def __getattr__(self, aname): 324 """supported attributes by name are: 325 - lineno - returns the line number of the exception text 326 - col - returns the column number of the exception text 327 - line - returns the line containing the exception text 328 """ 329 if aname == "lineno": 330 return lineno(self.loc, self.pstr) 331 elif aname in ("col", "column"): 332 return col(self.loc, self.pstr) 333 elif aname == "line": 334 return line(self.loc, self.pstr) 335 else: 336 raise AttributeError(aname) 337 338 def __str__(self): 339 if self.pstr: 340 if self.loc >= len(self.pstr): 341 foundstr = ', found end of text' 342 else: 343 foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\') 344 else: 345 foundstr = '' 346 return ("%s%s (at char %d), (line:%d, col:%d)" % 347 (self.msg, foundstr, self.loc, self.lineno, self.column)) 348 def __repr__(self): 349 return _ustr(self) 350 def markInputline(self, markerString=">!<"): 351 """Extracts the exception line from the input string, and marks 352 the location of the exception with a special symbol. 353 """ 354 line_str = self.line 355 line_column = self.column - 1 356 if markerString: 357 line_str = "".join((line_str[:line_column], 358 markerString, line_str[line_column:])) 359 return line_str.strip() 360 def __dir__(self): 361 return "lineno col line".split() + dir(type(self)) 362 363class ParseException(ParseBaseException): 364 """ 365 Exception thrown when parse expressions don't match class; 366 supported attributes by name are: 367 - lineno - returns the line number of the exception text 368 - col - returns the column number of the exception text 369 - line - returns the line containing the exception text 370 371 Example:: 372 373 try: 374 Word(nums).setName("integer").parseString("ABC") 375 except ParseException as pe: 376 print(pe) 377 print("column: {}".format(pe.col)) 378 379 prints:: 380 381 Expected integer (at char 0), (line:1, col:1) 382 column: 1 383 384 """ 385 386 @staticmethod 387 def explain(exc, depth=16): 388 """ 389 Method to take an exception and translate the Python internal traceback into a list 390 of the pyparsing expressions that caused the exception to be raised. 391 392 Parameters: 393 394 - exc - exception raised during parsing (need not be a ParseException, in support 395 of Python exceptions that might be raised in a parse action) 396 - depth (default=16) - number of levels back in the stack trace to list expression 397 and function names; if None, the full stack trace names will be listed; if 0, only 398 the failing input line, marker, and exception string will be shown 399 400 Returns a multi-line string listing the ParserElements and/or function names in the 401 exception's stack trace. 402 403 Note: the diagnostic output will include string representations of the expressions 404 that failed to parse. These representations will be more helpful if you use `setName` to 405 give identifiable names to your expressions. Otherwise they will use the default string 406 forms, which may be cryptic to read. 407 408 explain() is only supported under Python 3. 409 """ 410 import inspect 411 412 if depth is None: 413 depth = sys.getrecursionlimit() 414 ret = [] 415 if isinstance(exc, ParseBaseException): 416 ret.append(exc.line) 417 ret.append(' ' * (exc.col - 1) + '^') 418 ret.append("{0}: {1}".format(type(exc).__name__, exc)) 419 420 if depth > 0: 421 callers = inspect.getinnerframes(exc.__traceback__, context=depth) 422 seen = set() 423 for i, ff in enumerate(callers[-depth:]): 424 frm = ff[0] 425 426 f_self = frm.f_locals.get('self', None) 427 if isinstance(f_self, ParserElement): 428 if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'): 429 continue 430 if f_self in seen: 431 continue 432 seen.add(f_self) 433 434 self_type = type(f_self) 435 ret.append("{0}.{1} - {2}".format(self_type.__module__, 436 self_type.__name__, 437 f_self)) 438 elif f_self is not None: 439 self_type = type(f_self) 440 ret.append("{0}.{1}".format(self_type.__module__, 441 self_type.__name__)) 442 else: 443 code = frm.f_code 444 if code.co_name in ('wrapper', '<module>'): 445 continue 446 447 ret.append("{0}".format(code.co_name)) 448 449 depth -= 1 450 if not depth: 451 break 452 453 return '\n'.join(ret) 454 455 456class ParseFatalException(ParseBaseException): 457 """user-throwable exception thrown when inconsistent parse content 458 is found; stops all parsing immediately""" 459 pass 460 461class ParseSyntaxException(ParseFatalException): 462 """just like :class:`ParseFatalException`, but thrown internally 463 when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates 464 that parsing is to stop immediately because an unbacktrackable 465 syntax error has been found. 466 """ 467 pass 468 469#~ class ReparseException(ParseBaseException): 470 #~ """Experimental class - parse actions can raise this exception to cause 471 #~ pyparsing to reparse the input string: 472 #~ - with a modified input string, and/or 473 #~ - with a modified start location 474 #~ Set the values of the ReparseException in the constructor, and raise the 475 #~ exception in a parse action to cause pyparsing to use the new string/location. 476 #~ Setting the values as None causes no change to be made. 477 #~ """ 478 #~ def __init_( self, newstring, restartLoc ): 479 #~ self.newParseText = newstring 480 #~ self.reparseLoc = restartLoc 481 482class RecursiveGrammarException(Exception): 483 """exception thrown by :class:`ParserElement.validate` if the 484 grammar could be improperly recursive 485 """ 486 def __init__(self, parseElementList): 487 self.parseElementTrace = parseElementList 488 489 def __str__(self): 490 return "RecursiveGrammarException: %s" % self.parseElementTrace 491 492class _ParseResultsWithOffset(object): 493 def __init__(self, p1, p2): 494 self.tup = (p1, p2) 495 def __getitem__(self, i): 496 return self.tup[i] 497 def __repr__(self): 498 return repr(self.tup[0]) 499 def setOffset(self, i): 500 self.tup = (self.tup[0], i) 501 502class ParseResults(object): 503 """Structured parse results, to provide multiple means of access to 504 the parsed data: 505 506 - as a list (``len(results)``) 507 - by list index (``results[0], results[1]``, etc.) 508 - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`) 509 510 Example:: 511 512 integer = Word(nums) 513 date_str = (integer.setResultsName("year") + '/' 514 + integer.setResultsName("month") + '/' 515 + integer.setResultsName("day")) 516 # equivalent form: 517 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 518 519 # parseString returns a ParseResults object 520 result = date_str.parseString("1999/12/31") 521 522 def test(s, fn=repr): 523 print("%s -> %s" % (s, fn(eval(s)))) 524 test("list(result)") 525 test("result[0]") 526 test("result['month']") 527 test("result.day") 528 test("'month' in result") 529 test("'minutes' in result") 530 test("result.dump()", str) 531 532 prints:: 533 534 list(result) -> ['1999', '/', '12', '/', '31'] 535 result[0] -> '1999' 536 result['month'] -> '12' 537 result.day -> '31' 538 'month' in result -> True 539 'minutes' in result -> False 540 result.dump() -> ['1999', '/', '12', '/', '31'] 541 - day: 31 542 - month: 12 543 - year: 1999 544 """ 545 def __new__(cls, toklist=None, name=None, asList=True, modal=True): 546 if isinstance(toklist, cls): 547 return toklist 548 retobj = object.__new__(cls) 549 retobj.__doinit = True 550 return retobj 551 552 # Performance tuning: we construct a *lot* of these, so keep this 553 # constructor as small and fast as possible 554 def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance): 555 if self.__doinit: 556 self.__doinit = False 557 self.__name = None 558 self.__parent = None 559 self.__accumNames = {} 560 self.__asList = asList 561 self.__modal = modal 562 if toklist is None: 563 toklist = [] 564 if isinstance(toklist, list): 565 self.__toklist = toklist[:] 566 elif isinstance(toklist, _generatorType): 567 self.__toklist = list(toklist) 568 else: 569 self.__toklist = [toklist] 570 self.__tokdict = dict() 571 572 if name is not None and name: 573 if not modal: 574 self.__accumNames[name] = 0 575 if isinstance(name, int): 576 name = _ustr(name) # will always return a str, but use _ustr for consistency 577 self.__name = name 578 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])): 579 if isinstance(toklist, basestring): 580 toklist = [toklist] 581 if asList: 582 if isinstance(toklist, ParseResults): 583 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0) 584 else: 585 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0) 586 self[name].__name = name 587 else: 588 try: 589 self[name] = toklist[0] 590 except (KeyError, TypeError, IndexError): 591 self[name] = toklist 592 593 def __getitem__(self, i): 594 if isinstance(i, (int, slice)): 595 return self.__toklist[i] 596 else: 597 if i not in self.__accumNames: 598 return self.__tokdict[i][-1][0] 599 else: 600 return ParseResults([v[0] for v in self.__tokdict[i]]) 601 602 def __setitem__(self, k, v, isinstance=isinstance): 603 if isinstance(v, _ParseResultsWithOffset): 604 self.__tokdict[k] = self.__tokdict.get(k, list()) + [v] 605 sub = v[0] 606 elif isinstance(k, (int, slice)): 607 self.__toklist[k] = v 608 sub = v 609 else: 610 self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)] 611 sub = v 612 if isinstance(sub, ParseResults): 613 sub.__parent = wkref(self) 614 615 def __delitem__(self, i): 616 if isinstance(i, (int, slice)): 617 mylen = len(self.__toklist) 618 del self.__toklist[i] 619 620 # convert int to slice 621 if isinstance(i, int): 622 if i < 0: 623 i += mylen 624 i = slice(i, i + 1) 625 # get removed indices 626 removed = list(range(*i.indices(mylen))) 627 removed.reverse() 628 # fixup indices in token dictionary 629 for name, occurrences in self.__tokdict.items(): 630 for j in removed: 631 for k, (value, position) in enumerate(occurrences): 632 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 633 else: 634 del self.__tokdict[i] 635 636 def __contains__(self, k): 637 return k in self.__tokdict 638 639 def __len__(self): 640 return len(self.__toklist) 641 642 def __bool__(self): 643 return (not not self.__toklist) 644 __nonzero__ = __bool__ 645 646 def __iter__(self): 647 return iter(self.__toklist) 648 649 def __reversed__(self): 650 return iter(self.__toklist[::-1]) 651 652 def _iterkeys(self): 653 if hasattr(self.__tokdict, "iterkeys"): 654 return self.__tokdict.iterkeys() 655 else: 656 return iter(self.__tokdict) 657 658 def _itervalues(self): 659 return (self[k] for k in self._iterkeys()) 660 661 def _iteritems(self): 662 return ((k, self[k]) for k in self._iterkeys()) 663 664 if PY_3: 665 keys = _iterkeys 666 """Returns an iterator of all named result keys.""" 667 668 values = _itervalues 669 """Returns an iterator of all named result values.""" 670 671 items = _iteritems 672 """Returns an iterator of all named result key-value tuples.""" 673 674 else: 675 iterkeys = _iterkeys 676 """Returns an iterator of all named result keys (Python 2.x only).""" 677 678 itervalues = _itervalues 679 """Returns an iterator of all named result values (Python 2.x only).""" 680 681 iteritems = _iteritems 682 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 683 684 def keys(self): 685 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 686 return list(self.iterkeys()) 687 688 def values(self): 689 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 690 return list(self.itervalues()) 691 692 def items(self): 693 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 694 return list(self.iteritems()) 695 696 def haskeys(self): 697 """Since keys() returns an iterator, this method is helpful in bypassing 698 code that looks for the existence of any defined results names.""" 699 return bool(self.__tokdict) 700 701 def pop(self, *args, **kwargs): 702 """ 703 Removes and returns item at specified index (default= ``last``). 704 Supports both ``list`` and ``dict`` semantics for ``pop()``. If 705 passed no argument or an integer argument, it will use ``list`` 706 semantics and pop tokens from the list of parsed tokens. If passed 707 a non-integer argument (most likely a string), it will use ``dict`` 708 semantics and pop the corresponding value from any defined results 709 names. A second default return value argument is supported, just as in 710 ``dict.pop()``. 711 712 Example:: 713 714 def remove_first(tokens): 715 tokens.pop(0) 716 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 717 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 718 719 label = Word(alphas) 720 patt = label("LABEL") + OneOrMore(Word(nums)) 721 print(patt.parseString("AAB 123 321").dump()) 722 723 # Use pop() in a parse action to remove named result (note that corresponding value is not 724 # removed from list form of results) 725 def remove_LABEL(tokens): 726 tokens.pop("LABEL") 727 return tokens 728 patt.addParseAction(remove_LABEL) 729 print(patt.parseString("AAB 123 321").dump()) 730 731 prints:: 732 733 ['AAB', '123', '321'] 734 - LABEL: AAB 735 736 ['AAB', '123', '321'] 737 """ 738 if not args: 739 args = [-1] 740 for k, v in kwargs.items(): 741 if k == 'default': 742 args = (args[0], v) 743 else: 744 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 745 if (isinstance(args[0], int) 746 or len(args) == 1 747 or args[0] in self): 748 index = args[0] 749 ret = self[index] 750 del self[index] 751 return ret 752 else: 753 defaultvalue = args[1] 754 return defaultvalue 755 756 def get(self, key, defaultValue=None): 757 """ 758 Returns named result matching the given key, or if there is no 759 such name, then returns the given ``defaultValue`` or ``None`` if no 760 ``defaultValue`` is specified. 761 762 Similar to ``dict.get()``. 763 764 Example:: 765 766 integer = Word(nums) 767 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 768 769 result = date_str.parseString("1999/12/31") 770 print(result.get("year")) # -> '1999' 771 print(result.get("hour", "not specified")) # -> 'not specified' 772 print(result.get("hour")) # -> None 773 """ 774 if key in self: 775 return self[key] 776 else: 777 return defaultValue 778 779 def insert(self, index, insStr): 780 """ 781 Inserts new element at location index in the list of parsed tokens. 782 783 Similar to ``list.insert()``. 784 785 Example:: 786 787 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 788 789 # use a parse action to insert the parse location in the front of the parsed results 790 def insert_locn(locn, tokens): 791 tokens.insert(0, locn) 792 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 793 """ 794 self.__toklist.insert(index, insStr) 795 # fixup indices in token dictionary 796 for name, occurrences in self.__tokdict.items(): 797 for k, (value, position) in enumerate(occurrences): 798 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 799 800 def append(self, item): 801 """ 802 Add single element to end of ParseResults list of elements. 803 804 Example:: 805 806 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 807 808 # use a parse action to compute the sum of the parsed integers, and add it to the end 809 def append_sum(tokens): 810 tokens.append(sum(map(int, tokens))) 811 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 812 """ 813 self.__toklist.append(item) 814 815 def extend(self, itemseq): 816 """ 817 Add sequence of elements to end of ParseResults list of elements. 818 819 Example:: 820 821 patt = OneOrMore(Word(alphas)) 822 823 # use a parse action to append the reverse of the matched strings, to make a palindrome 824 def make_palindrome(tokens): 825 tokens.extend(reversed([t[::-1] for t in tokens])) 826 return ''.join(tokens) 827 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 828 """ 829 if isinstance(itemseq, ParseResults): 830 self.__iadd__(itemseq) 831 else: 832 self.__toklist.extend(itemseq) 833 834 def clear(self): 835 """ 836 Clear all elements and results names. 837 """ 838 del self.__toklist[:] 839 self.__tokdict.clear() 840 841 def __getattr__(self, name): 842 try: 843 return self[name] 844 except KeyError: 845 return "" 846 847 def __add__(self, other): 848 ret = self.copy() 849 ret += other 850 return ret 851 852 def __iadd__(self, other): 853 if other.__tokdict: 854 offset = len(self.__toklist) 855 addoffset = lambda a: offset if a < 0 else a + offset 856 otheritems = other.__tokdict.items() 857 otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) 858 for k, vlist in otheritems for v in vlist] 859 for k, v in otherdictitems: 860 self[k] = v 861 if isinstance(v[0], ParseResults): 862 v[0].__parent = wkref(self) 863 864 self.__toklist += other.__toklist 865 self.__accumNames.update(other.__accumNames) 866 return self 867 868 def __radd__(self, other): 869 if isinstance(other, int) and other == 0: 870 # useful for merging many ParseResults using sum() builtin 871 return self.copy() 872 else: 873 # this may raise a TypeError - so be it 874 return other + self 875 876 def __repr__(self): 877 return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict)) 878 879 def __str__(self): 880 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' 881 882 def _asStringList(self, sep=''): 883 out = [] 884 for item in self.__toklist: 885 if out and sep: 886 out.append(sep) 887 if isinstance(item, ParseResults): 888 out += item._asStringList() 889 else: 890 out.append(_ustr(item)) 891 return out 892 893 def asList(self): 894 """ 895 Returns the parse results as a nested list of matching tokens, all converted to strings. 896 897 Example:: 898 899 patt = OneOrMore(Word(alphas)) 900 result = patt.parseString("sldkj lsdkj sldkj") 901 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 902 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 903 904 # Use asList() to create an actual list 905 result_list = result.asList() 906 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 907 """ 908 return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist] 909 910 def asDict(self): 911 """ 912 Returns the named parse results as a nested dictionary. 913 914 Example:: 915 916 integer = Word(nums) 917 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 918 919 result = date_str.parseString('12/31/1999') 920 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 921 922 result_dict = result.asDict() 923 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 924 925 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 926 import json 927 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 928 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 929 """ 930 if PY_3: 931 item_fn = self.items 932 else: 933 item_fn = self.iteritems 934 935 def toItem(obj): 936 if isinstance(obj, ParseResults): 937 if obj.haskeys(): 938 return obj.asDict() 939 else: 940 return [toItem(v) for v in obj] 941 else: 942 return obj 943 944 return dict((k, toItem(v)) for k, v in item_fn()) 945 946 def copy(self): 947 """ 948 Returns a new copy of a :class:`ParseResults` object. 949 """ 950 ret = ParseResults(self.__toklist) 951 ret.__tokdict = dict(self.__tokdict.items()) 952 ret.__parent = self.__parent 953 ret.__accumNames.update(self.__accumNames) 954 ret.__name = self.__name 955 return ret 956 957 def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): 958 """ 959 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 960 """ 961 nl = "\n" 962 out = [] 963 namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items() 964 for v in vlist) 965 nextLevelIndent = indent + " " 966 967 # collapse out indents if formatting is not desired 968 if not formatted: 969 indent = "" 970 nextLevelIndent = "" 971 nl = "" 972 973 selfTag = None 974 if doctag is not None: 975 selfTag = doctag 976 else: 977 if self.__name: 978 selfTag = self.__name 979 980 if not selfTag: 981 if namedItemsOnly: 982 return "" 983 else: 984 selfTag = "ITEM" 985 986 out += [nl, indent, "<", selfTag, ">"] 987 988 for i, res in enumerate(self.__toklist): 989 if isinstance(res, ParseResults): 990 if i in namedItems: 991 out += [res.asXML(namedItems[i], 992 namedItemsOnly and doctag is None, 993 nextLevelIndent, 994 formatted)] 995 else: 996 out += [res.asXML(None, 997 namedItemsOnly and doctag is None, 998 nextLevelIndent, 999 formatted)] 1000 else: 1001 # individual token, see if there is a name for it 1002 resTag = None 1003 if i in namedItems: 1004 resTag = namedItems[i] 1005 if not resTag: 1006 if namedItemsOnly: 1007 continue 1008 else: 1009 resTag = "ITEM" 1010 xmlBodyText = _xml_escape(_ustr(res)) 1011 out += [nl, nextLevelIndent, "<", resTag, ">", 1012 xmlBodyText, 1013 "</", resTag, ">"] 1014 1015 out += [nl, indent, "</", selfTag, ">"] 1016 return "".join(out) 1017 1018 def __lookup(self, sub): 1019 for k, vlist in self.__tokdict.items(): 1020 for v, loc in vlist: 1021 if sub is v: 1022 return k 1023 return None 1024 1025 def getName(self): 1026 r""" 1027 Returns the results name for this token expression. Useful when several 1028 different expressions might match at a particular location. 1029 1030 Example:: 1031 1032 integer = Word(nums) 1033 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 1034 house_number_expr = Suppress('#') + Word(nums, alphanums) 1035 user_data = (Group(house_number_expr)("house_number") 1036 | Group(ssn_expr)("ssn") 1037 | Group(integer)("age")) 1038 user_info = OneOrMore(user_data) 1039 1040 result = user_info.parseString("22 111-22-3333 #221B") 1041 for item in result: 1042 print(item.getName(), ':', item[0]) 1043 1044 prints:: 1045 1046 age : 22 1047 ssn : 111-22-3333 1048 house_number : 221B 1049 """ 1050 if self.__name: 1051 return self.__name 1052 elif self.__parent: 1053 par = self.__parent() 1054 if par: 1055 return par.__lookup(self) 1056 else: 1057 return None 1058 elif (len(self) == 1 1059 and len(self.__tokdict) == 1 1060 and next(iter(self.__tokdict.values()))[0][1] in (0, -1)): 1061 return next(iter(self.__tokdict.keys())) 1062 else: 1063 return None 1064 1065 def dump(self, indent='', full=True, include_list=True, _depth=0): 1066 """ 1067 Diagnostic method for listing out the contents of 1068 a :class:`ParseResults`. Accepts an optional ``indent`` argument so 1069 that this string can be embedded in a nested display of other data. 1070 1071 Example:: 1072 1073 integer = Word(nums) 1074 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1075 1076 result = date_str.parseString('12/31/1999') 1077 print(result.dump()) 1078 1079 prints:: 1080 1081 ['12', '/', '31', '/', '1999'] 1082 - day: 1999 1083 - month: 31 1084 - year: 12 1085 """ 1086 out = [] 1087 NL = '\n' 1088 if include_list: 1089 out.append(indent + _ustr(self.asList())) 1090 else: 1091 out.append('') 1092 1093 if full: 1094 if self.haskeys(): 1095 items = sorted((str(k), v) for k, v in self.items()) 1096 for k, v in items: 1097 if out: 1098 out.append(NL) 1099 out.append("%s%s- %s: " % (indent, (' ' * _depth), k)) 1100 if isinstance(v, ParseResults): 1101 if v: 1102 out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1)) 1103 else: 1104 out.append(_ustr(v)) 1105 else: 1106 out.append(repr(v)) 1107 elif any(isinstance(vv, ParseResults) for vv in self): 1108 v = self 1109 for i, vv in enumerate(v): 1110 if isinstance(vv, ParseResults): 1111 out.append("\n%s%s[%d]:\n%s%s%s" % (indent, 1112 (' ' * (_depth)), 1113 i, 1114 indent, 1115 (' ' * (_depth + 1)), 1116 vv.dump(indent=indent, 1117 full=full, 1118 include_list=include_list, 1119 _depth=_depth + 1))) 1120 else: 1121 out.append("\n%s%s[%d]:\n%s%s%s" % (indent, 1122 (' ' * (_depth)), 1123 i, 1124 indent, 1125 (' ' * (_depth + 1)), 1126 _ustr(vv))) 1127 1128 return "".join(out) 1129 1130 def pprint(self, *args, **kwargs): 1131 """ 1132 Pretty-printer for parsed results as a list, using the 1133 `pprint <https://docs.python.org/3/library/pprint.html>`_ module. 1134 Accepts additional positional or keyword args as defined for 1135 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ . 1136 1137 Example:: 1138 1139 ident = Word(alphas, alphanums) 1140 num = Word(nums) 1141 func = Forward() 1142 term = ident | num | Group('(' + func + ')') 1143 func <<= ident + Group(Optional(delimitedList(term))) 1144 result = func.parseString("fna a,b,(fnb c,d,200),100") 1145 result.pprint(width=40) 1146 1147 prints:: 1148 1149 ['fna', 1150 ['a', 1151 'b', 1152 ['(', 'fnb', ['c', 'd', '200'], ')'], 1153 '100']] 1154 """ 1155 pprint.pprint(self.asList(), *args, **kwargs) 1156 1157 # add support for pickle protocol 1158 def __getstate__(self): 1159 return (self.__toklist, 1160 (self.__tokdict.copy(), 1161 self.__parent is not None and self.__parent() or None, 1162 self.__accumNames, 1163 self.__name)) 1164 1165 def __setstate__(self, state): 1166 self.__toklist = state[0] 1167 self.__tokdict, par, inAccumNames, self.__name = state[1] 1168 self.__accumNames = {} 1169 self.__accumNames.update(inAccumNames) 1170 if par is not None: 1171 self.__parent = wkref(par) 1172 else: 1173 self.__parent = None 1174 1175 def __getnewargs__(self): 1176 return self.__toklist, self.__name, self.__asList, self.__modal 1177 1178 def __dir__(self): 1179 return dir(type(self)) + list(self.keys()) 1180 1181 @classmethod 1182 def from_dict(cls, other, name=None): 1183 """ 1184 Helper classmethod to construct a ParseResults from a dict, preserving the 1185 name-value relations as results names. If an optional 'name' argument is 1186 given, a nested ParseResults will be returned 1187 """ 1188 def is_iterable(obj): 1189 try: 1190 iter(obj) 1191 except Exception: 1192 return False 1193 else: 1194 if PY_3: 1195 return not isinstance(obj, (str, bytes)) 1196 else: 1197 return not isinstance(obj, basestring) 1198 1199 ret = cls([]) 1200 for k, v in other.items(): 1201 if isinstance(v, Mapping): 1202 ret += cls.from_dict(v, name=k) 1203 else: 1204 ret += cls([v], name=k, asList=is_iterable(v)) 1205 if name is not None: 1206 ret = cls([ret], name=name) 1207 return ret 1208 1209MutableMapping.register(ParseResults) 1210 1211def col (loc, strg): 1212 """Returns current column within a string, counting newlines as line separators. 1213 The first column is number 1. 1214 1215 Note: the default parsing behavior is to expand tabs in the input string 1216 before starting the parsing process. See 1217 :class:`ParserElement.parseString` for more 1218 information on parsing strings containing ``<TAB>`` s, and suggested 1219 methods to maintain a consistent view of the parsed string, the parse 1220 location, and line and column positions within the parsed string. 1221 """ 1222 s = strg 1223 return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc) 1224 1225def lineno(loc, strg): 1226 """Returns current line number within a string, counting newlines as line separators. 1227 The first line is number 1. 1228 1229 Note - the default parsing behavior is to expand tabs in the input string 1230 before starting the parsing process. See :class:`ParserElement.parseString` 1231 for more information on parsing strings containing ``<TAB>`` s, and 1232 suggested methods to maintain a consistent view of the parsed string, the 1233 parse location, and line and column positions within the parsed string. 1234 """ 1235 return strg.count("\n", 0, loc) + 1 1236 1237def line(loc, strg): 1238 """Returns the line of text containing loc within a string, counting newlines as line separators. 1239 """ 1240 lastCR = strg.rfind("\n", 0, loc) 1241 nextCR = strg.find("\n", loc) 1242 if nextCR >= 0: 1243 return strg[lastCR + 1:nextCR] 1244 else: 1245 return strg[lastCR + 1:] 1246 1247def _defaultStartDebugAction(instring, loc, expr): 1248 print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring)))) 1249 1250def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks): 1251 print("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 1252 1253def _defaultExceptionDebugAction(instring, loc, expr, exc): 1254 print("Exception raised:" + _ustr(exc)) 1255 1256def nullDebugAction(*args): 1257 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 1258 pass 1259 1260# Only works on Python 3.x - nonlocal is toxic to Python 2 installs 1261#~ 'decorator to trim function calls to match the arity of the target' 1262#~ def _trim_arity(func, maxargs=3): 1263 #~ if func in singleArgBuiltins: 1264 #~ return lambda s,l,t: func(t) 1265 #~ limit = 0 1266 #~ foundArity = False 1267 #~ def wrapper(*args): 1268 #~ nonlocal limit,foundArity 1269 #~ while 1: 1270 #~ try: 1271 #~ ret = func(*args[limit:]) 1272 #~ foundArity = True 1273 #~ return ret 1274 #~ except TypeError: 1275 #~ if limit == maxargs or foundArity: 1276 #~ raise 1277 #~ limit += 1 1278 #~ continue 1279 #~ return wrapper 1280 1281# this version is Python 2.x-3.x cross-compatible 1282'decorator to trim function calls to match the arity of the target' 1283def _trim_arity(func, maxargs=2): 1284 if func in singleArgBuiltins: 1285 return lambda s, l, t: func(t) 1286 limit = [0] 1287 foundArity = [False] 1288 1289 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1290 if system_version[:2] >= (3, 5): 1291 def extract_stack(limit=0): 1292 # special handling for Python 3.5.0 - extra deep call stack by 1 1293 offset = -3 if system_version == (3, 5, 0) else -2 1294 frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset] 1295 return [frame_summary[:2]] 1296 def extract_tb(tb, limit=0): 1297 frames = traceback.extract_tb(tb, limit=limit) 1298 frame_summary = frames[-1] 1299 return [frame_summary[:2]] 1300 else: 1301 extract_stack = traceback.extract_stack 1302 extract_tb = traceback.extract_tb 1303 1304 # synthesize what would be returned by traceback.extract_stack at the call to 1305 # user's parse action 'func', so that we don't incur call penalty at parse time 1306 1307 LINE_DIFF = 6 1308 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1309 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1310 this_line = extract_stack(limit=2)[-1] 1311 pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF) 1312 1313 def wrapper(*args): 1314 while 1: 1315 try: 1316 ret = func(*args[limit[0]:]) 1317 foundArity[0] = True 1318 return ret 1319 except TypeError: 1320 # re-raise TypeErrors if they did not come from our arity testing 1321 if foundArity[0]: 1322 raise 1323 else: 1324 try: 1325 tb = sys.exc_info()[-1] 1326 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 1327 raise 1328 finally: 1329 try: 1330 del tb 1331 except NameError: 1332 pass 1333 1334 if limit[0] <= maxargs: 1335 limit[0] += 1 1336 continue 1337 raise 1338 1339 # copy func name to wrapper for sensible debug output 1340 func_name = "<parse action>" 1341 try: 1342 func_name = getattr(func, '__name__', 1343 getattr(func, '__class__').__name__) 1344 except Exception: 1345 func_name = str(func) 1346 wrapper.__name__ = func_name 1347 1348 return wrapper 1349 1350 1351class ParserElement(object): 1352 """Abstract base level parser element class.""" 1353 DEFAULT_WHITE_CHARS = " \n\t\r" 1354 verbose_stacktrace = False 1355 1356 @staticmethod 1357 def setDefaultWhitespaceChars(chars): 1358 r""" 1359 Overrides the default whitespace chars 1360 1361 Example:: 1362 1363 # default whitespace chars are space, <TAB> and newline 1364 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1365 1366 # change to just treat newline as significant 1367 ParserElement.setDefaultWhitespaceChars(" \t") 1368 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1369 """ 1370 ParserElement.DEFAULT_WHITE_CHARS = chars 1371 1372 @staticmethod 1373 def inlineLiteralsUsing(cls): 1374 """ 1375 Set class to be used for inclusion of string literals into a parser. 1376 1377 Example:: 1378 1379 # default literal class used is Literal 1380 integer = Word(nums) 1381 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1382 1383 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1384 1385 1386 # change to Suppress 1387 ParserElement.inlineLiteralsUsing(Suppress) 1388 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1389 1390 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1391 """ 1392 ParserElement._literalStringClass = cls 1393 1394 @classmethod 1395 def _trim_traceback(cls, tb): 1396 while tb.tb_next: 1397 tb = tb.tb_next 1398 return tb 1399 1400 def __init__(self, savelist=False): 1401 self.parseAction = list() 1402 self.failAction = None 1403 # ~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1404 self.strRepr = None 1405 self.resultsName = None 1406 self.saveAsList = savelist 1407 self.skipWhitespace = True 1408 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 1409 self.copyDefaultWhiteChars = True 1410 self.mayReturnEmpty = False # used when checking for left-recursion 1411 self.keepTabs = False 1412 self.ignoreExprs = list() 1413 self.debug = False 1414 self.streamlined = False 1415 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1416 self.errmsg = "" 1417 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1418 self.debugActions = (None, None, None) # custom debug actions 1419 self.re = None 1420 self.callPreparse = True # used to avoid redundant calls to preParse 1421 self.callDuringTry = False 1422 1423 def copy(self): 1424 """ 1425 Make a copy of this :class:`ParserElement`. Useful for defining 1426 different parse actions for the same parsing pattern, using copies of 1427 the original parse element. 1428 1429 Example:: 1430 1431 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1432 integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K") 1433 integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 1434 1435 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1436 1437 prints:: 1438 1439 [5120, 100, 655360, 268435456] 1440 1441 Equivalent form of ``expr.copy()`` is just ``expr()``:: 1442 1443 integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 1444 """ 1445 cpy = copy.copy(self) 1446 cpy.parseAction = self.parseAction[:] 1447 cpy.ignoreExprs = self.ignoreExprs[:] 1448 if self.copyDefaultWhiteChars: 1449 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1450 return cpy 1451 1452 def setName(self, name): 1453 """ 1454 Define name for this expression, makes debugging and exception messages clearer. 1455 1456 Example:: 1457 1458 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1459 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1460 """ 1461 self.name = name 1462 self.errmsg = "Expected " + self.name 1463 if __diag__.enable_debug_on_named_expressions: 1464 self.setDebug() 1465 return self 1466 1467 def setResultsName(self, name, listAllMatches=False): 1468 """ 1469 Define name for referencing matching tokens as a nested attribute 1470 of the returned parse results. 1471 NOTE: this returns a *copy* of the original :class:`ParserElement` object; 1472 this is so that the client can define a basic element, such as an 1473 integer, and reference it in multiple places with different names. 1474 1475 You can also set results names using the abbreviated syntax, 1476 ``expr("name")`` in place of ``expr.setResultsName("name")`` 1477 - see :class:`__call__`. 1478 1479 Example:: 1480 1481 date_str = (integer.setResultsName("year") + '/' 1482 + integer.setResultsName("month") + '/' 1483 + integer.setResultsName("day")) 1484 1485 # equivalent form: 1486 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1487 """ 1488 return self._setResultsName(name, listAllMatches) 1489 1490 def _setResultsName(self, name, listAllMatches=False): 1491 newself = self.copy() 1492 if name.endswith("*"): 1493 name = name[:-1] 1494 listAllMatches = True 1495 newself.resultsName = name 1496 newself.modalResults = not listAllMatches 1497 return newself 1498 1499 def setBreak(self, breakFlag=True): 1500 """Method to invoke the Python pdb debugger when this element is 1501 about to be parsed. Set ``breakFlag`` to True to enable, False to 1502 disable. 1503 """ 1504 if breakFlag: 1505 _parseMethod = self._parse 1506 def breaker(instring, loc, doActions=True, callPreParse=True): 1507 import pdb 1508 # this call to pdb.set_trace() is intentional, not a checkin error 1509 pdb.set_trace() 1510 return _parseMethod(instring, loc, doActions, callPreParse) 1511 breaker._originalParseMethod = _parseMethod 1512 self._parse = breaker 1513 else: 1514 if hasattr(self._parse, "_originalParseMethod"): 1515 self._parse = self._parse._originalParseMethod 1516 return self 1517 1518 def setParseAction(self, *fns, **kwargs): 1519 """ 1520 Define one or more actions to perform when successfully matching parse element definition. 1521 Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` , 1522 ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 1523 1524 - s = the original string being parsed (see note below) 1525 - loc = the location of the matching substring 1526 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object 1527 1528 If the functions in fns modify the tokens, they can return them as the return 1529 value from fn, and the modified list of tokens will replace the original. 1530 Otherwise, fn does not need to return any value. 1531 1532 If None is passed as the parse action, all previously added parse actions for this 1533 expression are cleared. 1534 1535 Optional keyword arguments: 1536 - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing 1537 1538 Note: the default parsing behavior is to expand tabs in the input string 1539 before starting the parsing process. See :class:`parseString for more 1540 information on parsing strings containing ``<TAB>`` s, and suggested 1541 methods to maintain a consistent view of the parsed string, the parse 1542 location, and line and column positions within the parsed string. 1543 1544 Example:: 1545 1546 integer = Word(nums) 1547 date_str = integer + '/' + integer + '/' + integer 1548 1549 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1550 1551 # use parse action to convert to ints at parse time 1552 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1553 date_str = integer + '/' + integer + '/' + integer 1554 1555 # note that integer fields are now ints, not strings 1556 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1557 """ 1558 if list(fns) == [None,]: 1559 self.parseAction = [] 1560 else: 1561 if not all(callable(fn) for fn in fns): 1562 raise TypeError("parse actions must be callable") 1563 self.parseAction = list(map(_trim_arity, list(fns))) 1564 self.callDuringTry = kwargs.get("callDuringTry", False) 1565 return self 1566 1567 def addParseAction(self, *fns, **kwargs): 1568 """ 1569 Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`. 1570 1571 See examples in :class:`copy`. 1572 """ 1573 self.parseAction += list(map(_trim_arity, list(fns))) 1574 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1575 return self 1576 1577 def addCondition(self, *fns, **kwargs): 1578 """Add a boolean predicate function to expression's list of parse actions. See 1579 :class:`setParseAction` for function call signatures. Unlike ``setParseAction``, 1580 functions passed to ``addCondition`` need to return boolean success/fail of the condition. 1581 1582 Optional keyword arguments: 1583 - message = define a custom message to be used in the raised exception 1584 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1585 1586 Example:: 1587 1588 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1589 year_int = integer.copy() 1590 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1591 date_str = year_int + '/' + integer + '/' + integer 1592 1593 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1594 """ 1595 for fn in fns: 1596 self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'), 1597 fatal=kwargs.get('fatal', False))) 1598 1599 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1600 return self 1601 1602 def setFailAction(self, fn): 1603 """Define action to perform if parsing fails at this expression. 1604 Fail acton fn is a callable function that takes the arguments 1605 ``fn(s, loc, expr, err)`` where: 1606 - s = string being parsed 1607 - loc = location where expression match was attempted and failed 1608 - expr = the parse expression that failed 1609 - err = the exception thrown 1610 The function returns no value. It may throw :class:`ParseFatalException` 1611 if it is desired to stop parsing immediately.""" 1612 self.failAction = fn 1613 return self 1614 1615 def _skipIgnorables(self, instring, loc): 1616 exprsFound = True 1617 while exprsFound: 1618 exprsFound = False 1619 for e in self.ignoreExprs: 1620 try: 1621 while 1: 1622 loc, dummy = e._parse(instring, loc) 1623 exprsFound = True 1624 except ParseException: 1625 pass 1626 return loc 1627 1628 def preParse(self, instring, loc): 1629 if self.ignoreExprs: 1630 loc = self._skipIgnorables(instring, loc) 1631 1632 if self.skipWhitespace: 1633 wt = self.whiteChars 1634 instrlen = len(instring) 1635 while loc < instrlen and instring[loc] in wt: 1636 loc += 1 1637 1638 return loc 1639 1640 def parseImpl(self, instring, loc, doActions=True): 1641 return loc, [] 1642 1643 def postParse(self, instring, loc, tokenlist): 1644 return tokenlist 1645 1646 # ~ @profile 1647 def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): 1648 TRY, MATCH, FAIL = 0, 1, 2 1649 debugging = (self.debug) # and doActions) 1650 1651 if debugging or self.failAction: 1652 # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring))) 1653 if self.debugActions[TRY]: 1654 self.debugActions[TRY](instring, loc, self) 1655 try: 1656 if callPreParse and self.callPreparse: 1657 preloc = self.preParse(instring, loc) 1658 else: 1659 preloc = loc 1660 tokensStart = preloc 1661 if self.mayIndexError or preloc >= len(instring): 1662 try: 1663 loc, tokens = self.parseImpl(instring, preloc, doActions) 1664 except IndexError: 1665 raise ParseException(instring, len(instring), self.errmsg, self) 1666 else: 1667 loc, tokens = self.parseImpl(instring, preloc, doActions) 1668 except Exception as err: 1669 # ~ print ("Exception raised:", err) 1670 if self.debugActions[FAIL]: 1671 self.debugActions[FAIL](instring, tokensStart, self, err) 1672 if self.failAction: 1673 self.failAction(instring, tokensStart, self, err) 1674 raise 1675 else: 1676 if callPreParse and self.callPreparse: 1677 preloc = self.preParse(instring, loc) 1678 else: 1679 preloc = loc 1680 tokensStart = preloc 1681 if self.mayIndexError or preloc >= len(instring): 1682 try: 1683 loc, tokens = self.parseImpl(instring, preloc, doActions) 1684 except IndexError: 1685 raise ParseException(instring, len(instring), self.errmsg, self) 1686 else: 1687 loc, tokens = self.parseImpl(instring, preloc, doActions) 1688 1689 tokens = self.postParse(instring, loc, tokens) 1690 1691 retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults) 1692 if self.parseAction and (doActions or self.callDuringTry): 1693 if debugging: 1694 try: 1695 for fn in self.parseAction: 1696 try: 1697 tokens = fn(instring, tokensStart, retTokens) 1698 except IndexError as parse_action_exc: 1699 exc = ParseException("exception raised in parse action") 1700 exc.__cause__ = parse_action_exc 1701 raise exc 1702 1703 if tokens is not None and tokens is not retTokens: 1704 retTokens = ParseResults(tokens, 1705 self.resultsName, 1706 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 1707 modal=self.modalResults) 1708 except Exception as err: 1709 # ~ print "Exception raised in user parse action:", err 1710 if self.debugActions[FAIL]: 1711 self.debugActions[FAIL](instring, tokensStart, self, err) 1712 raise 1713 else: 1714 for fn in self.parseAction: 1715 try: 1716 tokens = fn(instring, tokensStart, retTokens) 1717 except IndexError as parse_action_exc: 1718 exc = ParseException("exception raised in parse action") 1719 exc.__cause__ = parse_action_exc 1720 raise exc 1721 1722 if tokens is not None and tokens is not retTokens: 1723 retTokens = ParseResults(tokens, 1724 self.resultsName, 1725 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 1726 modal=self.modalResults) 1727 if debugging: 1728 # ~ print ("Matched", self, "->", retTokens.asList()) 1729 if self.debugActions[MATCH]: 1730 self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens) 1731 1732 return loc, retTokens 1733 1734 def tryParse(self, instring, loc): 1735 try: 1736 return self._parse(instring, loc, doActions=False)[0] 1737 except ParseFatalException: 1738 raise ParseException(instring, loc, self.errmsg, self) 1739 1740 def canParseNext(self, instring, loc): 1741 try: 1742 self.tryParse(instring, loc) 1743 except (ParseException, IndexError): 1744 return False 1745 else: 1746 return True 1747 1748 class _UnboundedCache(object): 1749 def __init__(self): 1750 cache = {} 1751 self.not_in_cache = not_in_cache = object() 1752 1753 def get(self, key): 1754 return cache.get(key, not_in_cache) 1755 1756 def set(self, key, value): 1757 cache[key] = value 1758 1759 def clear(self): 1760 cache.clear() 1761 1762 def cache_len(self): 1763 return len(cache) 1764 1765 self.get = types.MethodType(get, self) 1766 self.set = types.MethodType(set, self) 1767 self.clear = types.MethodType(clear, self) 1768 self.__len__ = types.MethodType(cache_len, self) 1769 1770 if _OrderedDict is not None: 1771 class _FifoCache(object): 1772 def __init__(self, size): 1773 self.not_in_cache = not_in_cache = object() 1774 1775 cache = _OrderedDict() 1776 1777 def get(self, key): 1778 return cache.get(key, not_in_cache) 1779 1780 def set(self, key, value): 1781 cache[key] = value 1782 while len(cache) > size: 1783 try: 1784 cache.popitem(False) 1785 except KeyError: 1786 pass 1787 1788 def clear(self): 1789 cache.clear() 1790 1791 def cache_len(self): 1792 return len(cache) 1793 1794 self.get = types.MethodType(get, self) 1795 self.set = types.MethodType(set, self) 1796 self.clear = types.MethodType(clear, self) 1797 self.__len__ = types.MethodType(cache_len, self) 1798 1799 else: 1800 class _FifoCache(object): 1801 def __init__(self, size): 1802 self.not_in_cache = not_in_cache = object() 1803 1804 cache = {} 1805 key_fifo = collections.deque([], size) 1806 1807 def get(self, key): 1808 return cache.get(key, not_in_cache) 1809 1810 def set(self, key, value): 1811 cache[key] = value 1812 while len(key_fifo) > size: 1813 cache.pop(key_fifo.popleft(), None) 1814 key_fifo.append(key) 1815 1816 def clear(self): 1817 cache.clear() 1818 key_fifo.clear() 1819 1820 def cache_len(self): 1821 return len(cache) 1822 1823 self.get = types.MethodType(get, self) 1824 self.set = types.MethodType(set, self) 1825 self.clear = types.MethodType(clear, self) 1826 self.__len__ = types.MethodType(cache_len, self) 1827 1828 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1829 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1830 packrat_cache_lock = RLock() 1831 packrat_cache_stats = [0, 0] 1832 1833 # this method gets repeatedly called during backtracking with the same arguments - 1834 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 1835 def _parseCache(self, instring, loc, doActions=True, callPreParse=True): 1836 HIT, MISS = 0, 1 1837 lookup = (self, instring, loc, callPreParse, doActions) 1838 with ParserElement.packrat_cache_lock: 1839 cache = ParserElement.packrat_cache 1840 value = cache.get(lookup) 1841 if value is cache.not_in_cache: 1842 ParserElement.packrat_cache_stats[MISS] += 1 1843 try: 1844 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1845 except ParseBaseException as pe: 1846 # cache a copy of the exception, without the traceback 1847 cache.set(lookup, pe.__class__(*pe.args)) 1848 raise 1849 else: 1850 cache.set(lookup, (value[0], value[1].copy())) 1851 return value 1852 else: 1853 ParserElement.packrat_cache_stats[HIT] += 1 1854 if isinstance(value, Exception): 1855 raise value 1856 return value[0], value[1].copy() 1857 1858 _parse = _parseNoCache 1859 1860 @staticmethod 1861 def resetCache(): 1862 ParserElement.packrat_cache.clear() 1863 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) 1864 1865 _packratEnabled = False 1866 @staticmethod 1867 def enablePackrat(cache_size_limit=128): 1868 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1869 Repeated parse attempts at the same string location (which happens 1870 often in many complex grammars) can immediately return a cached value, 1871 instead of re-executing parsing/validating code. Memoizing is done of 1872 both valid results and parsing exceptions. 1873 1874 Parameters: 1875 1876 - cache_size_limit - (default= ``128``) - if an integer value is provided 1877 will limit the size of the packrat cache; if None is passed, then 1878 the cache size will be unbounded; if 0 is passed, the cache will 1879 be effectively disabled. 1880 1881 This speedup may break existing programs that use parse actions that 1882 have side-effects. For this reason, packrat parsing is disabled when 1883 you first import pyparsing. To activate the packrat feature, your 1884 program must call the class method :class:`ParserElement.enablePackrat`. 1885 For best results, call ``enablePackrat()`` immediately after 1886 importing pyparsing. 1887 1888 Example:: 1889 1890 import pyparsing 1891 pyparsing.ParserElement.enablePackrat() 1892 """ 1893 if not ParserElement._packratEnabled: 1894 ParserElement._packratEnabled = True 1895 if cache_size_limit is None: 1896 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1897 else: 1898 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1899 ParserElement._parse = ParserElement._parseCache 1900 1901 def parseString(self, instring, parseAll=False): 1902 """ 1903 Execute the parse expression with the given string. 1904 This is the main interface to the client code, once the complete 1905 expression has been built. 1906 1907 Returns the parsed data as a :class:`ParseResults` object, which may be 1908 accessed as a list, or as a dict or object with attributes if the given parser 1909 includes results names. 1910 1911 If you want the grammar to require that the entire input string be 1912 successfully parsed, then set ``parseAll`` to True (equivalent to ending 1913 the grammar with ``StringEnd()``). 1914 1915 Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string, 1916 in order to report proper column numbers in parse actions. 1917 If the input string contains tabs and 1918 the grammar uses parse actions that use the ``loc`` argument to index into the 1919 string being parsed, you can ensure you have a consistent view of the input 1920 string by: 1921 1922 - calling ``parseWithTabs`` on your grammar before calling ``parseString`` 1923 (see :class:`parseWithTabs`) 1924 - define your parse action using the full ``(s, loc, toks)`` signature, and 1925 reference the input string using the parse action's ``s`` argument 1926 - explictly expand the tabs in your input string before calling 1927 ``parseString`` 1928 1929 Example:: 1930 1931 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1932 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1933 """ 1934 ParserElement.resetCache() 1935 if not self.streamlined: 1936 self.streamline() 1937 # ~ self.saveAsList = True 1938 for e in self.ignoreExprs: 1939 e.streamline() 1940 if not self.keepTabs: 1941 instring = instring.expandtabs() 1942 try: 1943 loc, tokens = self._parse(instring, 0) 1944 if parseAll: 1945 loc = self.preParse(instring, loc) 1946 se = Empty() + StringEnd() 1947 se._parse(instring, loc) 1948 except ParseBaseException as exc: 1949 if ParserElement.verbose_stacktrace: 1950 raise 1951 else: 1952 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 1953 if getattr(exc, '__traceback__', None) is not None: 1954 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 1955 raise exc 1956 else: 1957 return tokens 1958 1959 def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): 1960 """ 1961 Scan the input string for expression matches. Each match will return the 1962 matching tokens, start location, and end location. May be called with optional 1963 ``maxMatches`` argument, to clip scanning after 'n' matches are found. If 1964 ``overlap`` is specified, then overlapping matches will be reported. 1965 1966 Note that the start and end locations are reported relative to the string 1967 being parsed. See :class:`parseString` for more information on parsing 1968 strings with embedded tabs. 1969 1970 Example:: 1971 1972 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1973 print(source) 1974 for tokens, start, end in Word(alphas).scanString(source): 1975 print(' '*start + '^'*(end-start)) 1976 print(' '*start + tokens[0]) 1977 1978 prints:: 1979 1980 sldjf123lsdjjkf345sldkjf879lkjsfd987 1981 ^^^^^ 1982 sldjf 1983 ^^^^^^^ 1984 lsdjjkf 1985 ^^^^^^ 1986 sldkjf 1987 ^^^^^^ 1988 lkjsfd 1989 """ 1990 if not self.streamlined: 1991 self.streamline() 1992 for e in self.ignoreExprs: 1993 e.streamline() 1994 1995 if not self.keepTabs: 1996 instring = _ustr(instring).expandtabs() 1997 instrlen = len(instring) 1998 loc = 0 1999 preparseFn = self.preParse 2000 parseFn = self._parse 2001 ParserElement.resetCache() 2002 matches = 0 2003 try: 2004 while loc <= instrlen and matches < maxMatches: 2005 try: 2006 preloc = preparseFn(instring, loc) 2007 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 2008 except ParseException: 2009 loc = preloc + 1 2010 else: 2011 if nextLoc > loc: 2012 matches += 1 2013 yield tokens, preloc, nextLoc 2014 if overlap: 2015 nextloc = preparseFn(instring, loc) 2016 if nextloc > loc: 2017 loc = nextLoc 2018 else: 2019 loc += 1 2020 else: 2021 loc = nextLoc 2022 else: 2023 loc = preloc + 1 2024 except ParseBaseException as exc: 2025 if ParserElement.verbose_stacktrace: 2026 raise 2027 else: 2028 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 2029 if getattr(exc, '__traceback__', None) is not None: 2030 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 2031 raise exc 2032 2033 def transformString(self, instring): 2034 """ 2035 Extension to :class:`scanString`, to modify matching text with modified tokens that may 2036 be returned from a parse action. To use ``transformString``, define a grammar and 2037 attach a parse action to it that modifies the returned token list. 2038 Invoking ``transformString()`` on a target string will then scan for matches, 2039 and replace the matched text patterns according to the logic in the parse 2040 action. ``transformString()`` returns the resulting transformed string. 2041 2042 Example:: 2043 2044 wd = Word(alphas) 2045 wd.setParseAction(lambda toks: toks[0].title()) 2046 2047 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 2048 2049 prints:: 2050 2051 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 2052 """ 2053 out = [] 2054 lastE = 0 2055 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 2056 # keep string locs straight between transformString and scanString 2057 self.keepTabs = True 2058 try: 2059 for t, s, e in self.scanString(instring): 2060 out.append(instring[lastE:s]) 2061 if t: 2062 if isinstance(t, ParseResults): 2063 out += t.asList() 2064 elif isinstance(t, list): 2065 out += t 2066 else: 2067 out.append(t) 2068 lastE = e 2069 out.append(instring[lastE:]) 2070 out = [o for o in out if o] 2071 return "".join(map(_ustr, _flatten(out))) 2072 except ParseBaseException as exc: 2073 if ParserElement.verbose_stacktrace: 2074 raise 2075 else: 2076 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 2077 if getattr(exc, '__traceback__', None) is not None: 2078 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 2079 raise exc 2080 2081 def searchString(self, instring, maxMatches=_MAX_INT): 2082 """ 2083 Another extension to :class:`scanString`, simplifying the access to the tokens found 2084 to match the given parse expression. May be called with optional 2085 ``maxMatches`` argument, to clip searching after 'n' matches are found. 2086 2087 Example:: 2088 2089 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 2090 cap_word = Word(alphas.upper(), alphas.lower()) 2091 2092 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 2093 2094 # the sum() builtin can be used to merge results into a single ParseResults object 2095 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 2096 2097 prints:: 2098 2099 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 2100 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 2101 """ 2102 try: 2103 return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)]) 2104 except ParseBaseException as exc: 2105 if ParserElement.verbose_stacktrace: 2106 raise 2107 else: 2108 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 2109 if getattr(exc, '__traceback__', None) is not None: 2110 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 2111 raise exc 2112 2113 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): 2114 """ 2115 Generator method to split a string using the given expression as a separator. 2116 May be called with optional ``maxsplit`` argument, to limit the number of splits; 2117 and the optional ``includeSeparators`` argument (default= ``False``), if the separating 2118 matching text should be included in the split results. 2119 2120 Example:: 2121 2122 punc = oneOf(list(".,;:/-!?")) 2123 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 2124 2125 prints:: 2126 2127 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 2128 """ 2129 splits = 0 2130 last = 0 2131 for t, s, e in self.scanString(instring, maxMatches=maxsplit): 2132 yield instring[last:s] 2133 if includeSeparators: 2134 yield t[0] 2135 last = e 2136 yield instring[last:] 2137 2138 def __add__(self, other): 2139 """ 2140 Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement 2141 converts them to :class:`Literal`s by default. 2142 2143 Example:: 2144 2145 greet = Word(alphas) + "," + Word(alphas) + "!" 2146 hello = "Hello, World!" 2147 print (hello, "->", greet.parseString(hello)) 2148 2149 prints:: 2150 2151 Hello, World! -> ['Hello', ',', 'World', '!'] 2152 2153 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. 2154 2155 Literal('start') + ... + Literal('end') 2156 2157 is equivalent to: 2158 2159 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 2160 2161 Note that the skipped text is returned with '_skipped' as a results name, 2162 and to support having multiple skips in the same parser, the value returned is 2163 a list of all skipped text. 2164 """ 2165 if other is Ellipsis: 2166 return _PendingSkip(self) 2167 2168 if isinstance(other, basestring): 2169 other = self._literalStringClass(other) 2170 if not isinstance(other, ParserElement): 2171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2172 SyntaxWarning, stacklevel=2) 2173 return None 2174 return And([self, other]) 2175 2176 def __radd__(self, other): 2177 """ 2178 Implementation of + operator when left operand is not a :class:`ParserElement` 2179 """ 2180 if other is Ellipsis: 2181 return SkipTo(self)("_skipped*") + self 2182 2183 if isinstance(other, basestring): 2184 other = self._literalStringClass(other) 2185 if not isinstance(other, ParserElement): 2186 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2187 SyntaxWarning, stacklevel=2) 2188 return None 2189 return other + self 2190 2191 def __sub__(self, other): 2192 """ 2193 Implementation of - operator, returns :class:`And` with error stop 2194 """ 2195 if isinstance(other, basestring): 2196 other = self._literalStringClass(other) 2197 if not isinstance(other, ParserElement): 2198 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2199 SyntaxWarning, stacklevel=2) 2200 return None 2201 return self + And._ErrorStop() + other 2202 2203 def __rsub__(self, other): 2204 """ 2205 Implementation of - operator when left operand is not a :class:`ParserElement` 2206 """ 2207 if isinstance(other, basestring): 2208 other = self._literalStringClass(other) 2209 if not isinstance(other, ParserElement): 2210 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2211 SyntaxWarning, stacklevel=2) 2212 return None 2213 return other - self 2214 2215 def __mul__(self, other): 2216 """ 2217 Implementation of * operator, allows use of ``expr * 3`` in place of 2218 ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer 2219 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 2220 may also include ``None`` as in: 2221 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 2222 to ``expr*n + ZeroOrMore(expr)`` 2223 (read as "at least n instances of ``expr``") 2224 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 2225 (read as "0 to n instances of ``expr``") 2226 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 2227 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 2228 2229 Note that ``expr*(None, n)`` does not raise an exception if 2230 more than n exprs exist in the input stream; that is, 2231 ``expr*(None, n)`` does not enforce a maximum number of expr 2232 occurrences. If this behavior is desired, then write 2233 ``expr*(None, n) + ~expr`` 2234 """ 2235 if other is Ellipsis: 2236 other = (0, None) 2237 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 2238 other = ((0, ) + other[1:] + (None,))[:2] 2239 2240 if isinstance(other, int): 2241 minElements, optElements = other, 0 2242 elif isinstance(other, tuple): 2243 other = tuple(o if o is not Ellipsis else None for o in other) 2244 other = (other + (None, None))[:2] 2245 if other[0] is None: 2246 other = (0, other[1]) 2247 if isinstance(other[0], int) and other[1] is None: 2248 if other[0] == 0: 2249 return ZeroOrMore(self) 2250 if other[0] == 1: 2251 return OneOrMore(self) 2252 else: 2253 return self * other[0] + ZeroOrMore(self) 2254 elif isinstance(other[0], int) and isinstance(other[1], int): 2255 minElements, optElements = other 2256 optElements -= minElements 2257 else: 2258 raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1])) 2259 else: 2260 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 2261 2262 if minElements < 0: 2263 raise ValueError("cannot multiply ParserElement by negative value") 2264 if optElements < 0: 2265 raise ValueError("second tuple value must be greater or equal to first tuple value") 2266 if minElements == optElements == 0: 2267 raise ValueError("cannot multiply ParserElement by 0 or (0, 0)") 2268 2269 if optElements: 2270 def makeOptionalList(n): 2271 if n > 1: 2272 return Optional(self + makeOptionalList(n - 1)) 2273 else: 2274 return Optional(self) 2275 if minElements: 2276 if minElements == 1: 2277 ret = self + makeOptionalList(optElements) 2278 else: 2279 ret = And([self] * minElements) + makeOptionalList(optElements) 2280 else: 2281 ret = makeOptionalList(optElements) 2282 else: 2283 if minElements == 1: 2284 ret = self 2285 else: 2286 ret = And([self] * minElements) 2287 return ret 2288 2289 def __rmul__(self, other): 2290 return self.__mul__(other) 2291 2292 def __or__(self, other): 2293 """ 2294 Implementation of | operator - returns :class:`MatchFirst` 2295 """ 2296 if other is Ellipsis: 2297 return _PendingSkip(self, must_skip=True) 2298 2299 if isinstance(other, basestring): 2300 other = self._literalStringClass(other) 2301 if not isinstance(other, ParserElement): 2302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2303 SyntaxWarning, stacklevel=2) 2304 return None 2305 return MatchFirst([self, other]) 2306 2307 def __ror__(self, other): 2308 """ 2309 Implementation of | operator when left operand is not a :class:`ParserElement` 2310 """ 2311 if isinstance(other, basestring): 2312 other = self._literalStringClass(other) 2313 if not isinstance(other, ParserElement): 2314 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2315 SyntaxWarning, stacklevel=2) 2316 return None 2317 return other | self 2318 2319 def __xor__(self, other): 2320 """ 2321 Implementation of ^ operator - returns :class:`Or` 2322 """ 2323 if isinstance(other, basestring): 2324 other = self._literalStringClass(other) 2325 if not isinstance(other, ParserElement): 2326 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2327 SyntaxWarning, stacklevel=2) 2328 return None 2329 return Or([self, other]) 2330 2331 def __rxor__(self, other): 2332 """ 2333 Implementation of ^ operator when left operand is not a :class:`ParserElement` 2334 """ 2335 if isinstance(other, basestring): 2336 other = self._literalStringClass(other) 2337 if not isinstance(other, ParserElement): 2338 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2339 SyntaxWarning, stacklevel=2) 2340 return None 2341 return other ^ self 2342 2343 def __and__(self, other): 2344 """ 2345 Implementation of & operator - returns :class:`Each` 2346 """ 2347 if isinstance(other, basestring): 2348 other = self._literalStringClass(other) 2349 if not isinstance(other, ParserElement): 2350 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2351 SyntaxWarning, stacklevel=2) 2352 return None 2353 return Each([self, other]) 2354 2355 def __rand__(self, other): 2356 """ 2357 Implementation of & operator when left operand is not a :class:`ParserElement` 2358 """ 2359 if isinstance(other, basestring): 2360 other = self._literalStringClass(other) 2361 if not isinstance(other, ParserElement): 2362 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 2363 SyntaxWarning, stacklevel=2) 2364 return None 2365 return other & self 2366 2367 def __invert__(self): 2368 """ 2369 Implementation of ~ operator - returns :class:`NotAny` 2370 """ 2371 return NotAny(self) 2372 2373 def __iter__(self): 2374 # must implement __iter__ to override legacy use of sequential access to __getitem__ to 2375 # iterate over a sequence 2376 raise TypeError('%r object is not iterable' % self.__class__.__name__) 2377 2378 def __getitem__(self, key): 2379 """ 2380 use ``[]`` indexing notation as a short form for expression repetition: 2381 - ``expr[n]`` is equivalent to ``expr*n`` 2382 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 2383 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 2384 to ``expr*n + ZeroOrMore(expr)`` 2385 (read as "at least n instances of ``expr``") 2386 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 2387 (read as "0 to n instances of ``expr``") 2388 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 2389 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 2390 ``None`` may be used in place of ``...``. 2391 2392 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception 2393 if more than ``n`` ``expr``s exist in the input stream. If this behavior is 2394 desired, then write ``expr[..., n] + ~expr``. 2395 """ 2396 2397 # convert single arg keys to tuples 2398 try: 2399 if isinstance(key, str): 2400 key = (key,) 2401 iter(key) 2402 except TypeError: 2403 key = (key, key) 2404 2405 if len(key) > 2: 2406 warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5], 2407 '... [{0}]'.format(len(key)) 2408 if len(key) > 5 else '')) 2409 2410 # clip to 2 elements 2411 ret = self * tuple(key[:2]) 2412 return ret 2413 2414 def __call__(self, name=None): 2415 """ 2416 Shortcut for :class:`setResultsName`, with ``listAllMatches=False``. 2417 2418 If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be 2419 passed as ``True``. 2420 2421 If ``name` is omitted, same as calling :class:`copy`. 2422 2423 Example:: 2424 2425 # these are equivalent 2426 userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno") 2427 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 2428 """ 2429 if name is not None: 2430 return self._setResultsName(name) 2431 else: 2432 return self.copy() 2433 2434 def suppress(self): 2435 """ 2436 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 2437 cluttering up returned output. 2438 """ 2439 return Suppress(self) 2440 2441 def leaveWhitespace(self): 2442 """ 2443 Disables the skipping of whitespace before matching the characters in the 2444 :class:`ParserElement`'s defined pattern. This is normally only used internally by 2445 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2446 """ 2447 self.skipWhitespace = False 2448 return self 2449 2450 def setWhitespaceChars(self, chars): 2451 """ 2452 Overrides the default whitespace chars 2453 """ 2454 self.skipWhitespace = True 2455 self.whiteChars = chars 2456 self.copyDefaultWhiteChars = False 2457 return self 2458 2459 def parseWithTabs(self): 2460 """ 2461 Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string. 2462 Must be called before ``parseString`` when the input grammar contains elements that 2463 match ``<TAB>`` characters. 2464 """ 2465 self.keepTabs = True 2466 return self 2467 2468 def ignore(self, other): 2469 """ 2470 Define expression to be ignored (e.g., comments) while doing pattern 2471 matching; may be called repeatedly, to define multiple comment or other 2472 ignorable patterns. 2473 2474 Example:: 2475 2476 patt = OneOrMore(Word(alphas)) 2477 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2478 2479 patt.ignore(cStyleComment) 2480 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2481 """ 2482 if isinstance(other, basestring): 2483 other = Suppress(other) 2484 2485 if isinstance(other, Suppress): 2486 if other not in self.ignoreExprs: 2487 self.ignoreExprs.append(other) 2488 else: 2489 self.ignoreExprs.append(Suppress(other.copy())) 2490 return self 2491 2492 def setDebugActions(self, startAction, successAction, exceptionAction): 2493 """ 2494 Enable display of debugging messages while doing pattern matching. 2495 """ 2496 self.debugActions = (startAction or _defaultStartDebugAction, 2497 successAction or _defaultSuccessDebugAction, 2498 exceptionAction or _defaultExceptionDebugAction) 2499 self.debug = True 2500 return self 2501 2502 def setDebug(self, flag=True): 2503 """ 2504 Enable display of debugging messages while doing pattern matching. 2505 Set ``flag`` to True to enable, False to disable. 2506 2507 Example:: 2508 2509 wd = Word(alphas).setName("alphaword") 2510 integer = Word(nums).setName("numword") 2511 term = wd | integer 2512 2513 # turn on debugging for wd 2514 wd.setDebug() 2515 2516 OneOrMore(term).parseString("abc 123 xyz 890") 2517 2518 prints:: 2519 2520 Match alphaword at loc 0(1,1) 2521 Matched alphaword -> ['abc'] 2522 Match alphaword at loc 3(1,4) 2523 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 2524 Match alphaword at loc 7(1,8) 2525 Matched alphaword -> ['xyz'] 2526 Match alphaword at loc 11(1,12) 2527 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 2528 Match alphaword at loc 15(1,16) 2529 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 2530 2531 The output shown is that produced by the default debug actions - custom debug actions can be 2532 specified using :class:`setDebugActions`. Prior to attempting 2533 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 2534 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 2535 message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression, 2536 which makes debugging and exception messages easier to understand - for instance, the default 2537 name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``. 2538 """ 2539 if flag: 2540 self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction) 2541 else: 2542 self.debug = False 2543 return self 2544 2545 def __str__(self): 2546 return self.name 2547 2548 def __repr__(self): 2549 return _ustr(self) 2550 2551 def streamline(self): 2552 self.streamlined = True 2553 self.strRepr = None 2554 return self 2555 2556 def checkRecursion(self, parseElementList): 2557 pass 2558 2559 def validate(self, validateTrace=None): 2560 """ 2561 Check defined expressions for valid structure, check for infinite recursive definitions. 2562 """ 2563 self.checkRecursion([]) 2564 2565 def parseFile(self, file_or_filename, parseAll=False): 2566 """ 2567 Execute the parse expression on the given file or filename. 2568 If a filename is specified (instead of a file object), 2569 the entire file is opened, read, and closed before parsing. 2570 """ 2571 try: 2572 file_contents = file_or_filename.read() 2573 except AttributeError: 2574 with open(file_or_filename, "r") as f: 2575 file_contents = f.read() 2576 try: 2577 return self.parseString(file_contents, parseAll) 2578 except ParseBaseException as exc: 2579 if ParserElement.verbose_stacktrace: 2580 raise 2581 else: 2582 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 2583 if getattr(exc, '__traceback__', None) is not None: 2584 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 2585 raise exc 2586 2587 def __eq__(self, other): 2588 if self is other: 2589 return True 2590 elif isinstance(other, basestring): 2591 return self.matches(other) 2592 elif isinstance(other, ParserElement): 2593 return vars(self) == vars(other) 2594 return False 2595 2596 def __ne__(self, other): 2597 return not (self == other) 2598 2599 def __hash__(self): 2600 return id(self) 2601 2602 def __req__(self, other): 2603 return self == other 2604 2605 def __rne__(self, other): 2606 return not (self == other) 2607 2608 def matches(self, testString, parseAll=True): 2609 """ 2610 Method for quick testing of a parser against a test string. Good for simple 2611 inline microtests of sub expressions while building up larger parser. 2612 2613 Parameters: 2614 - testString - to test against this expression for a match 2615 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests 2616 2617 Example:: 2618 2619 expr = Word(nums) 2620 assert expr.matches("100") 2621 """ 2622 try: 2623 self.parseString(_ustr(testString), parseAll=parseAll) 2624 return True 2625 except ParseBaseException: 2626 return False 2627 2628 def runTests(self, tests, parseAll=True, comment='#', 2629 fullDump=True, printResults=True, failureTests=False, postParse=None, 2630 file=None): 2631 """ 2632 Execute the parse expression on a series of test strings, showing each 2633 test, the parsed results or where the parse failed. Quick and easy way to 2634 run a parse expression against a list of sample strings. 2635 2636 Parameters: 2637 - tests - a list of separate test strings, or a multiline string of test strings 2638 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests 2639 - comment - (default= ``'#'``) - expression for indicating embedded comments in the test 2640 string; pass None to disable comment filtering 2641 - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline; 2642 if False, only dump nested list 2643 - printResults - (default= ``True``) prints test output to stdout 2644 - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing 2645 - postParse - (default= ``None``) optional callback for successful parse results; called as 2646 `fn(test_string, parse_results)` and returns a string to be added to the test output 2647 - file - (default=``None``) optional file-like object to which test output will be written; 2648 if None, will default to ``sys.stdout`` 2649 2650 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2651 (or failed if ``failureTests`` is True), and the results contain a list of lines of each 2652 test's output 2653 2654 Example:: 2655 2656 number_expr = pyparsing_common.number.copy() 2657 2658 result = number_expr.runTests(''' 2659 # unsigned integer 2660 100 2661 # negative integer 2662 -100 2663 # float with scientific notation 2664 6.02e23 2665 # integer with scientific notation 2666 1e-12 2667 ''') 2668 print("Success" if result[0] else "Failed!") 2669 2670 result = number_expr.runTests(''' 2671 # stray character 2672 100Z 2673 # missing leading digit before '.' 2674 -.100 2675 # too many '.' 2676 3.14.159 2677 ''', failureTests=True) 2678 print("Success" if result[0] else "Failed!") 2679 2680 prints:: 2681 2682 # unsigned integer 2683 100 2684 [100] 2685 2686 # negative integer 2687 -100 2688 [-100] 2689 2690 # float with scientific notation 2691 6.02e23 2692 [6.02e+23] 2693 2694 # integer with scientific notation 2695 1e-12 2696 [1e-12] 2697 2698 Success 2699 2700 # stray character 2701 100Z 2702 ^ 2703 FAIL: Expected end of text (at char 3), (line:1, col:4) 2704 2705 # missing leading digit before '.' 2706 -.100 2707 ^ 2708 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2709 2710 # too many '.' 2711 3.14.159 2712 ^ 2713 FAIL: Expected end of text (at char 4), (line:1, col:5) 2714 2715 Success 2716 2717 Each test string must be on a single line. If you want to test a string that spans multiple 2718 lines, create a test like this:: 2719 2720 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 2721 2722 (Note that this is a raw string literal, you must include the leading 'r'.) 2723 """ 2724 if isinstance(tests, basestring): 2725 tests = list(map(str.strip, tests.rstrip().splitlines())) 2726 if isinstance(comment, basestring): 2727 comment = Literal(comment) 2728 if file is None: 2729 file = sys.stdout 2730 print_ = file.write 2731 2732 allResults = [] 2733 comments = [] 2734 success = True 2735 NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString) 2736 BOM = u'\ufeff' 2737 for t in tests: 2738 if comment is not None and comment.matches(t, False) or comments and not t: 2739 comments.append(t) 2740 continue 2741 if not t: 2742 continue 2743 out = ['\n' + '\n'.join(comments) if comments else '', t] 2744 comments = [] 2745 try: 2746 # convert newline marks to actual newlines, and strip leading BOM if present 2747 t = NL.transformString(t.lstrip(BOM)) 2748 result = self.parseString(t, parseAll=parseAll) 2749 except ParseBaseException as pe: 2750 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2751 if '\n' in t: 2752 out.append(line(pe.loc, t)) 2753 out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal) 2754 else: 2755 out.append(' ' * pe.loc + '^' + fatal) 2756 out.append("FAIL: " + str(pe)) 2757 success = success and failureTests 2758 result = pe 2759 except Exception as exc: 2760 out.append("FAIL-EXCEPTION: " + str(exc)) 2761 success = success and failureTests 2762 result = exc 2763 else: 2764 success = success and not failureTests 2765 if postParse is not None: 2766 try: 2767 pp_value = postParse(t, result) 2768 if pp_value is not None: 2769 if isinstance(pp_value, ParseResults): 2770 out.append(pp_value.dump()) 2771 else: 2772 out.append(str(pp_value)) 2773 else: 2774 out.append(result.dump()) 2775 except Exception as e: 2776 out.append(result.dump(full=fullDump)) 2777 out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e)) 2778 else: 2779 out.append(result.dump(full=fullDump)) 2780 2781 if printResults: 2782 if fullDump: 2783 out.append('') 2784 print_('\n'.join(out)) 2785 2786 allResults.append((t, result)) 2787 2788 return success, allResults 2789 2790 2791class _PendingSkip(ParserElement): 2792 # internal placeholder class to hold a place were '...' is added to a parser element, 2793 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 2794 def __init__(self, expr, must_skip=False): 2795 super(_PendingSkip, self).__init__() 2796 self.strRepr = str(expr + Empty()).replace('Empty', '...') 2797 self.name = self.strRepr 2798 self.anchor = expr 2799 self.must_skip = must_skip 2800 2801 def __add__(self, other): 2802 skipper = SkipTo(other).setName("...")("_skipped*") 2803 if self.must_skip: 2804 def must_skip(t): 2805 if not t._skipped or t._skipped.asList() == ['']: 2806 del t[0] 2807 t.pop("_skipped", None) 2808 def show_skip(t): 2809 if t._skipped.asList()[-1:] == ['']: 2810 skipped = t.pop('_skipped') 2811 t['_skipped'] = 'missing <' + repr(self.anchor) + '>' 2812 return (self.anchor + skipper().addParseAction(must_skip) 2813 | skipper().addParseAction(show_skip)) + other 2814 2815 return self.anchor + skipper + other 2816 2817 def __repr__(self): 2818 return self.strRepr 2819 2820 def parseImpl(self, *args): 2821 raise Exception("use of `...` expression without following SkipTo target expression") 2822 2823 2824class Token(ParserElement): 2825 """Abstract :class:`ParserElement` subclass, for defining atomic 2826 matching patterns. 2827 """ 2828 def __init__(self): 2829 super(Token, self).__init__(savelist=False) 2830 2831 2832class Empty(Token): 2833 """An empty token, will always match. 2834 """ 2835 def __init__(self): 2836 super(Empty, self).__init__() 2837 self.name = "Empty" 2838 self.mayReturnEmpty = True 2839 self.mayIndexError = False 2840 2841 2842class NoMatch(Token): 2843 """A token that will never match. 2844 """ 2845 def __init__(self): 2846 super(NoMatch, self).__init__() 2847 self.name = "NoMatch" 2848 self.mayReturnEmpty = True 2849 self.mayIndexError = False 2850 self.errmsg = "Unmatchable token" 2851 2852 def parseImpl(self, instring, loc, doActions=True): 2853 raise ParseException(instring, loc, self.errmsg, self) 2854 2855 2856class Literal(Token): 2857 """Token to exactly match a specified string. 2858 2859 Example:: 2860 2861 Literal('blah').parseString('blah') # -> ['blah'] 2862 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2863 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2864 2865 For case-insensitive matching, use :class:`CaselessLiteral`. 2866 2867 For keyword matching (force word break before and after the matched string), 2868 use :class:`Keyword` or :class:`CaselessKeyword`. 2869 """ 2870 def __init__(self, matchString): 2871 super(Literal, self).__init__() 2872 self.match = matchString 2873 self.matchLen = len(matchString) 2874 try: 2875 self.firstMatchChar = matchString[0] 2876 except IndexError: 2877 warnings.warn("null string passed to Literal; use Empty() instead", 2878 SyntaxWarning, stacklevel=2) 2879 self.__class__ = Empty 2880 self.name = '"%s"' % _ustr(self.match) 2881 self.errmsg = "Expected " + self.name 2882 self.mayReturnEmpty = False 2883 self.mayIndexError = False 2884 2885 # Performance tuning: modify __class__ to select 2886 # a parseImpl optimized for single-character check 2887 if self.matchLen == 1 and type(self) is Literal: 2888 self.__class__ = _SingleCharLiteral 2889 2890 def parseImpl(self, instring, loc, doActions=True): 2891 if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc): 2892 return loc + self.matchLen, self.match 2893 raise ParseException(instring, loc, self.errmsg, self) 2894 2895class _SingleCharLiteral(Literal): 2896 def parseImpl(self, instring, loc, doActions=True): 2897 if instring[loc] == self.firstMatchChar: 2898 return loc + 1, self.match 2899 raise ParseException(instring, loc, self.errmsg, self) 2900 2901_L = Literal 2902ParserElement._literalStringClass = Literal 2903 2904class Keyword(Token): 2905 """Token to exactly match a specified string as a keyword, that is, 2906 it must be immediately followed by a non-keyword character. Compare 2907 with :class:`Literal`: 2908 2909 - ``Literal("if")`` will match the leading ``'if'`` in 2910 ``'ifAndOnlyIf'``. 2911 - ``Keyword("if")`` will not; it will only match the leading 2912 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 2913 2914 Accepts two optional constructor arguments in addition to the 2915 keyword string: 2916 2917 - ``identChars`` is a string of characters that would be valid 2918 identifier characters, defaulting to all alphanumerics + "_" and 2919 "$" 2920 - ``caseless`` allows case-insensitive matching, default is ``False``. 2921 2922 Example:: 2923 2924 Keyword("start").parseString("start") # -> ['start'] 2925 Keyword("start").parseString("starting") # -> Exception 2926 2927 For case-insensitive matching, use :class:`CaselessKeyword`. 2928 """ 2929 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 2930 2931 def __init__(self, matchString, identChars=None, caseless=False): 2932 super(Keyword, self).__init__() 2933 if identChars is None: 2934 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2935 self.match = matchString 2936 self.matchLen = len(matchString) 2937 try: 2938 self.firstMatchChar = matchString[0] 2939 except IndexError: 2940 warnings.warn("null string passed to Keyword; use Empty() instead", 2941 SyntaxWarning, stacklevel=2) 2942 self.name = '"%s"' % self.match 2943 self.errmsg = "Expected " + self.name 2944 self.mayReturnEmpty = False 2945 self.mayIndexError = False 2946 self.caseless = caseless 2947 if caseless: 2948 self.caselessmatch = matchString.upper() 2949 identChars = identChars.upper() 2950 self.identChars = set(identChars) 2951 2952 def parseImpl(self, instring, loc, doActions=True): 2953 if self.caseless: 2954 if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch) 2955 and (loc >= len(instring) - self.matchLen 2956 or instring[loc + self.matchLen].upper() not in self.identChars) 2957 and (loc == 0 2958 or instring[loc - 1].upper() not in self.identChars)): 2959 return loc + self.matchLen, self.match 2960 2961 else: 2962 if instring[loc] == self.firstMatchChar: 2963 if ((self.matchLen == 1 or instring.startswith(self.match, loc)) 2964 and (loc >= len(instring) - self.matchLen 2965 or instring[loc + self.matchLen] not in self.identChars) 2966 and (loc == 0 or instring[loc - 1] not in self.identChars)): 2967 return loc + self.matchLen, self.match 2968 2969 raise ParseException(instring, loc, self.errmsg, self) 2970 2971 def copy(self): 2972 c = super(Keyword, self).copy() 2973 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2974 return c 2975 2976 @staticmethod 2977 def setDefaultKeywordChars(chars): 2978 """Overrides the default Keyword chars 2979 """ 2980 Keyword.DEFAULT_KEYWORD_CHARS = chars 2981 2982class CaselessLiteral(Literal): 2983 """Token to match a specified string, ignoring case of letters. 2984 Note: the matched results will always be in the case of the given 2985 match string, NOT the case of the input text. 2986 2987 Example:: 2988 2989 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2990 2991 (Contrast with example for :class:`CaselessKeyword`.) 2992 """ 2993 def __init__(self, matchString): 2994 super(CaselessLiteral, self).__init__(matchString.upper()) 2995 # Preserve the defining literal. 2996 self.returnString = matchString 2997 self.name = "'%s'" % self.returnString 2998 self.errmsg = "Expected " + self.name 2999 3000 def parseImpl(self, instring, loc, doActions=True): 3001 if instring[loc:loc + self.matchLen].upper() == self.match: 3002 return loc + self.matchLen, self.returnString 3003 raise ParseException(instring, loc, self.errmsg, self) 3004 3005class CaselessKeyword(Keyword): 3006 """ 3007 Caseless version of :class:`Keyword`. 3008 3009 Example:: 3010 3011 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 3012 3013 (Contrast with example for :class:`CaselessLiteral`.) 3014 """ 3015 def __init__(self, matchString, identChars=None): 3016 super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True) 3017 3018class CloseMatch(Token): 3019 """A variation on :class:`Literal` which matches "close" matches, 3020 that is, strings with at most 'n' mismatching characters. 3021 :class:`CloseMatch` takes parameters: 3022 3023 - ``match_string`` - string to be matched 3024 - ``maxMismatches`` - (``default=1``) maximum number of 3025 mismatches allowed to count as a match 3026 3027 The results from a successful parse will contain the matched text 3028 from the input string and the following named results: 3029 3030 - ``mismatches`` - a list of the positions within the 3031 match_string where mismatches were found 3032 - ``original`` - the original match_string used to compare 3033 against the input string 3034 3035 If ``mismatches`` is an empty list, then the match was an exact 3036 match. 3037 3038 Example:: 3039 3040 patt = CloseMatch("ATCATCGAATGGA") 3041 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 3042 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 3043 3044 # exact match 3045 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 3046 3047 # close match allowing up to 2 mismatches 3048 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 3049 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 3050 """ 3051 def __init__(self, match_string, maxMismatches=1): 3052 super(CloseMatch, self).__init__() 3053 self.name = match_string 3054 self.match_string = match_string 3055 self.maxMismatches = maxMismatches 3056 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 3057 self.mayIndexError = False 3058 self.mayReturnEmpty = False 3059 3060 def parseImpl(self, instring, loc, doActions=True): 3061 start = loc 3062 instrlen = len(instring) 3063 maxloc = start + len(self.match_string) 3064 3065 if maxloc <= instrlen: 3066 match_string = self.match_string 3067 match_stringloc = 0 3068 mismatches = [] 3069 maxMismatches = self.maxMismatches 3070 3071 for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)): 3072 src, mat = s_m 3073 if src != mat: 3074 mismatches.append(match_stringloc) 3075 if len(mismatches) > maxMismatches: 3076 break 3077 else: 3078 loc = match_stringloc + 1 3079 results = ParseResults([instring[start:loc]]) 3080 results['original'] = match_string 3081 results['mismatches'] = mismatches 3082 return loc, results 3083 3084 raise ParseException(instring, loc, self.errmsg, self) 3085 3086 3087class Word(Token): 3088 """Token for matching words composed of allowed character sets. 3089 Defined with string containing all allowed initial characters, an 3090 optional string containing allowed body characters (if omitted, 3091 defaults to the initial character set), and an optional minimum, 3092 maximum, and/or exact length. The default value for ``min`` is 3093 1 (a minimum value < 1 is not valid); the default values for 3094 ``max`` and ``exact`` are 0, meaning no maximum or exact 3095 length restriction. An optional ``excludeChars`` parameter can 3096 list characters that might be found in the input ``bodyChars`` 3097 string; useful to define a word of all printables except for one or 3098 two characters, for instance. 3099 3100 :class:`srange` is useful for defining custom character set strings 3101 for defining ``Word`` expressions, using range notation from 3102 regular expression character sets. 3103 3104 A common mistake is to use :class:`Word` to match a specific literal 3105 string, as in ``Word("Address")``. Remember that :class:`Word` 3106 uses the string argument to define *sets* of matchable characters. 3107 This expression would match "Add", "AAA", "dAred", or any other word 3108 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 3109 exact literal string, use :class:`Literal` or :class:`Keyword`. 3110 3111 pyparsing includes helper strings for building Words: 3112 3113 - :class:`alphas` 3114 - :class:`nums` 3115 - :class:`alphanums` 3116 - :class:`hexnums` 3117 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 3118 - accented, tilded, umlauted, etc.) 3119 - :class:`punc8bit` (non-alphabetic characters in ASCII range 3120 128-255 - currency, symbols, superscripts, diacriticals, etc.) 3121 - :class:`printables` (any non-whitespace character) 3122 3123 Example:: 3124 3125 # a word composed of digits 3126 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 3127 3128 # a word with a leading capital, and zero or more lowercase 3129 capital_word = Word(alphas.upper(), alphas.lower()) 3130 3131 # hostnames are alphanumeric, with leading alpha, and '-' 3132 hostname = Word(alphas, alphanums + '-') 3133 3134 # roman numeral (not a strict parser, accepts invalid mix of characters) 3135 roman = Word("IVXLCDM") 3136 3137 # any string of non-whitespace characters, except for ',' 3138 csv_value = Word(printables, excludeChars=",") 3139 """ 3140 def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None): 3141 super(Word, self).__init__() 3142 if excludeChars: 3143 excludeChars = set(excludeChars) 3144 initChars = ''.join(c for c in initChars if c not in excludeChars) 3145 if bodyChars: 3146 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 3147 self.initCharsOrig = initChars 3148 self.initChars = set(initChars) 3149 if bodyChars: 3150 self.bodyCharsOrig = bodyChars 3151 self.bodyChars = set(bodyChars) 3152 else: 3153 self.bodyCharsOrig = initChars 3154 self.bodyChars = set(initChars) 3155 3156 self.maxSpecified = max > 0 3157 3158 if min < 1: 3159 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 3160 3161 self.minLen = min 3162 3163 if max > 0: 3164 self.maxLen = max 3165 else: 3166 self.maxLen = _MAX_INT 3167 3168 if exact > 0: 3169 self.maxLen = exact 3170 self.minLen = exact 3171 3172 self.name = _ustr(self) 3173 self.errmsg = "Expected " + self.name 3174 self.mayIndexError = False 3175 self.asKeyword = asKeyword 3176 3177 if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0): 3178 if self.bodyCharsOrig == self.initCharsOrig: 3179 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 3180 elif len(self.initCharsOrig) == 1: 3181 self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig), 3182 _escapeRegexRangeChars(self.bodyCharsOrig),) 3183 else: 3184 self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig), 3185 _escapeRegexRangeChars(self.bodyCharsOrig),) 3186 if self.asKeyword: 3187 self.reString = r"\b" + self.reString + r"\b" 3188 3189 try: 3190 self.re = re.compile(self.reString) 3191 except Exception: 3192 self.re = None 3193 else: 3194 self.re_match = self.re.match 3195 self.__class__ = _WordRegex 3196 3197 def parseImpl(self, instring, loc, doActions=True): 3198 if instring[loc] not in self.initChars: 3199 raise ParseException(instring, loc, self.errmsg, self) 3200 3201 start = loc 3202 loc += 1 3203 instrlen = len(instring) 3204 bodychars = self.bodyChars 3205 maxloc = start + self.maxLen 3206 maxloc = min(maxloc, instrlen) 3207 while loc < maxloc and instring[loc] in bodychars: 3208 loc += 1 3209 3210 throwException = False 3211 if loc - start < self.minLen: 3212 throwException = True 3213 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 3214 throwException = True 3215 elif self.asKeyword: 3216 if (start > 0 and instring[start - 1] in bodychars 3217 or loc < instrlen and instring[loc] in bodychars): 3218 throwException = True 3219 3220 if throwException: 3221 raise ParseException(instring, loc, self.errmsg, self) 3222 3223 return loc, instring[start:loc] 3224 3225 def __str__(self): 3226 try: 3227 return super(Word, self).__str__() 3228 except Exception: 3229 pass 3230 3231 if self.strRepr is None: 3232 3233 def charsAsStr(s): 3234 if len(s) > 4: 3235 return s[:4] + "..." 3236 else: 3237 return s 3238 3239 if self.initCharsOrig != self.bodyCharsOrig: 3240 self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig)) 3241 else: 3242 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 3243 3244 return self.strRepr 3245 3246class _WordRegex(Word): 3247 def parseImpl(self, instring, loc, doActions=True): 3248 result = self.re_match(instring, loc) 3249 if not result: 3250 raise ParseException(instring, loc, self.errmsg, self) 3251 3252 loc = result.end() 3253 return loc, result.group() 3254 3255 3256class Char(_WordRegex): 3257 """A short-cut class for defining ``Word(characters, exact=1)``, 3258 when defining a match of any single character in a string of 3259 characters. 3260 """ 3261 def __init__(self, charset, asKeyword=False, excludeChars=None): 3262 super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars) 3263 self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars)) 3264 if asKeyword: 3265 self.reString = r"\b%s\b" % self.reString 3266 self.re = re.compile(self.reString) 3267 self.re_match = self.re.match 3268 3269 3270class Regex(Token): 3271 r"""Token for matching strings that match a given regular 3272 expression. Defined with string specifying the regular expression in 3273 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 3274 If the given regex contains named groups (defined using ``(?P<name>...)``), 3275 these will be preserved as named parse results. 3276 3277 If instead of the Python stdlib re module you wish to use a different RE module 3278 (such as the `regex` module), you can replace it by either building your 3279 Regex object with a compiled RE that was compiled using regex: 3280 3281 Example:: 3282 3283 realnum = Regex(r"[+-]?\d+\.\d*") 3284 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 3285 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 3286 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 3287 3288 # use regex module instead of stdlib re module to construct a Regex using 3289 # a compiled regular expression 3290 import regex 3291 parser = pp.Regex(regex.compile(r'[0-9]')) 3292 3293 """ 3294 def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False): 3295 """The parameters ``pattern`` and ``flags`` are passed 3296 to the ``re.compile()`` function as-is. See the Python 3297 `re module <https://docs.python.org/3/library/re.html>`_ module for an 3298 explanation of the acceptable patterns and flags. 3299 """ 3300 super(Regex, self).__init__() 3301 3302 if isinstance(pattern, basestring): 3303 if not pattern: 3304 warnings.warn("null string passed to Regex; use Empty() instead", 3305 SyntaxWarning, stacklevel=2) 3306 3307 self.pattern = pattern 3308 self.flags = flags 3309 3310 try: 3311 self.re = re.compile(self.pattern, self.flags) 3312 self.reString = self.pattern 3313 except sre_constants.error: 3314 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 3315 SyntaxWarning, stacklevel=2) 3316 raise 3317 3318 elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'): 3319 self.re = pattern 3320 self.pattern = self.reString = pattern.pattern 3321 self.flags = flags 3322 3323 else: 3324 raise TypeError("Regex may only be constructed with a string or a compiled RE object") 3325 3326 self.re_match = self.re.match 3327 3328 self.name = _ustr(self) 3329 self.errmsg = "Expected " + self.name 3330 self.mayIndexError = False 3331 self.mayReturnEmpty = self.re_match("") is not None 3332 self.asGroupList = asGroupList 3333 self.asMatch = asMatch 3334 if self.asGroupList: 3335 self.parseImpl = self.parseImplAsGroupList 3336 if self.asMatch: 3337 self.parseImpl = self.parseImplAsMatch 3338 3339 def parseImpl(self, instring, loc, doActions=True): 3340 result = self.re_match(instring, loc) 3341 if not result: 3342 raise ParseException(instring, loc, self.errmsg, self) 3343 3344 loc = result.end() 3345 ret = ParseResults(result.group()) 3346 d = result.groupdict() 3347 if d: 3348 for k, v in d.items(): 3349 ret[k] = v 3350 return loc, ret 3351 3352 def parseImplAsGroupList(self, instring, loc, doActions=True): 3353 result = self.re_match(instring, loc) 3354 if not result: 3355 raise ParseException(instring, loc, self.errmsg, self) 3356 3357 loc = result.end() 3358 ret = result.groups() 3359 return loc, ret 3360 3361 def parseImplAsMatch(self, instring, loc, doActions=True): 3362 result = self.re_match(instring, loc) 3363 if not result: 3364 raise ParseException(instring, loc, self.errmsg, self) 3365 3366 loc = result.end() 3367 ret = result 3368 return loc, ret 3369 3370 def __str__(self): 3371 try: 3372 return super(Regex, self).__str__() 3373 except Exception: 3374 pass 3375 3376 if self.strRepr is None: 3377 self.strRepr = "Re:(%s)" % repr(self.pattern) 3378 3379 return self.strRepr 3380 3381 def sub(self, repl): 3382 r""" 3383 Return Regex with an attached parse action to transform the parsed 3384 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 3385 3386 Example:: 3387 3388 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 3389 print(make_html.transformString("h1:main title:")) 3390 # prints "<h1>main title</h1>" 3391 """ 3392 if self.asGroupList: 3393 warnings.warn("cannot use sub() with Regex(asGroupList=True)", 3394 SyntaxWarning, stacklevel=2) 3395 raise SyntaxError() 3396 3397 if self.asMatch and callable(repl): 3398 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)", 3399 SyntaxWarning, stacklevel=2) 3400 raise SyntaxError() 3401 3402 if self.asMatch: 3403 def pa(tokens): 3404 return tokens[0].expand(repl) 3405 else: 3406 def pa(tokens): 3407 return self.re.sub(repl, tokens[0]) 3408 return self.addParseAction(pa) 3409 3410class QuotedString(Token): 3411 r""" 3412 Token for matching strings that are delimited by quoting characters. 3413 3414 Defined with the following parameters: 3415 3416 - quoteChar - string of one or more characters defining the 3417 quote delimiting string 3418 - escChar - character to escape quotes, typically backslash 3419 (default= ``None``) 3420 - escQuote - special quote sequence to escape an embedded quote 3421 string (such as SQL's ``""`` to escape an embedded ``"``) 3422 (default= ``None``) 3423 - multiline - boolean indicating whether quotes can span 3424 multiple lines (default= ``False``) 3425 - unquoteResults - boolean indicating whether the matched text 3426 should be unquoted (default= ``True``) 3427 - endQuoteChar - string of one or more characters defining the 3428 end of the quote delimited string (default= ``None`` => same as 3429 quoteChar) 3430 - convertWhitespaceEscapes - convert escaped whitespace 3431 (``'\t'``, ``'\n'``, etc.) to actual whitespace 3432 (default= ``True``) 3433 3434 Example:: 3435 3436 qs = QuotedString('"') 3437 print(qs.searchString('lsjdf "This is the quote" sldjf')) 3438 complex_qs = QuotedString('{{', endQuoteChar='}}') 3439 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 3440 sql_qs = QuotedString('"', escQuote='""') 3441 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 3442 3443 prints:: 3444 3445 [['This is the quote']] 3446 [['This is the "quote"']] 3447 [['This is the quote with "embedded" quotes']] 3448 """ 3449 def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, 3450 unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): 3451 super(QuotedString, self).__init__() 3452 3453 # remove white space from quote chars - wont work anyway 3454 quoteChar = quoteChar.strip() 3455 if not quoteChar: 3456 warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 3457 raise SyntaxError() 3458 3459 if endQuoteChar is None: 3460 endQuoteChar = quoteChar 3461 else: 3462 endQuoteChar = endQuoteChar.strip() 3463 if not endQuoteChar: 3464 warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 3465 raise SyntaxError() 3466 3467 self.quoteChar = quoteChar 3468 self.quoteCharLen = len(quoteChar) 3469 self.firstQuoteChar = quoteChar[0] 3470 self.endQuoteChar = endQuoteChar 3471 self.endQuoteCharLen = len(endQuoteChar) 3472 self.escChar = escChar 3473 self.escQuote = escQuote 3474 self.unquoteResults = unquoteResults 3475 self.convertWhitespaceEscapes = convertWhitespaceEscapes 3476 3477 if multiline: 3478 self.flags = re.MULTILINE | re.DOTALL 3479 self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar), 3480 _escapeRegexRangeChars(self.endQuoteChar[0]), 3481 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 3482 else: 3483 self.flags = 0 3484 self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar), 3485 _escapeRegexRangeChars(self.endQuoteChar[0]), 3486 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 3487 if len(self.endQuoteChar) > 1: 3488 self.pattern += ( 3489 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 3490 _escapeRegexRangeChars(self.endQuoteChar[i])) 3491 for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')') 3492 3493 if escQuote: 3494 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 3495 if escChar: 3496 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 3497 self.escCharReplacePattern = re.escape(self.escChar) + "(.)" 3498 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 3499 3500 try: 3501 self.re = re.compile(self.pattern, self.flags) 3502 self.reString = self.pattern 3503 self.re_match = self.re.match 3504 except sre_constants.error: 3505 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 3506 SyntaxWarning, stacklevel=2) 3507 raise 3508 3509 self.name = _ustr(self) 3510 self.errmsg = "Expected " + self.name 3511 self.mayIndexError = False 3512 self.mayReturnEmpty = True 3513 3514 def parseImpl(self, instring, loc, doActions=True): 3515 result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None 3516 if not result: 3517 raise ParseException(instring, loc, self.errmsg, self) 3518 3519 loc = result.end() 3520 ret = result.group() 3521 3522 if self.unquoteResults: 3523 3524 # strip off quotes 3525 ret = ret[self.quoteCharLen: -self.endQuoteCharLen] 3526 3527 if isinstance(ret, basestring): 3528 # replace escaped whitespace 3529 if '\\' in ret and self.convertWhitespaceEscapes: 3530 ws_map = { 3531 r'\t': '\t', 3532 r'\n': '\n', 3533 r'\f': '\f', 3534 r'\r': '\r', 3535 } 3536 for wslit, wschar in ws_map.items(): 3537 ret = ret.replace(wslit, wschar) 3538 3539 # replace escaped characters 3540 if self.escChar: 3541 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 3542 3543 # replace escaped quotes 3544 if self.escQuote: 3545 ret = ret.replace(self.escQuote, self.endQuoteChar) 3546 3547 return loc, ret 3548 3549 def __str__(self): 3550 try: 3551 return super(QuotedString, self).__str__() 3552 except Exception: 3553 pass 3554 3555 if self.strRepr is None: 3556 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 3557 3558 return self.strRepr 3559 3560 3561class CharsNotIn(Token): 3562 """Token for matching words composed of characters *not* in a given 3563 set (will include whitespace in matched characters if not listed in 3564 the provided exclusion set - see example). Defined with string 3565 containing all disallowed characters, and an optional minimum, 3566 maximum, and/or exact length. The default value for ``min`` is 3567 1 (a minimum value < 1 is not valid); the default values for 3568 ``max`` and ``exact`` are 0, meaning no maximum or exact 3569 length restriction. 3570 3571 Example:: 3572 3573 # define a comma-separated-value as anything that is not a ',' 3574 csv_value = CharsNotIn(',') 3575 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 3576 3577 prints:: 3578 3579 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 3580 """ 3581 def __init__(self, notChars, min=1, max=0, exact=0): 3582 super(CharsNotIn, self).__init__() 3583 self.skipWhitespace = False 3584 self.notChars = notChars 3585 3586 if min < 1: 3587 raise ValueError("cannot specify a minimum length < 1; use " 3588 "Optional(CharsNotIn()) if zero-length char group is permitted") 3589 3590 self.minLen = min 3591 3592 if max > 0: 3593 self.maxLen = max 3594 else: 3595 self.maxLen = _MAX_INT 3596 3597 if exact > 0: 3598 self.maxLen = exact 3599 self.minLen = exact 3600 3601 self.name = _ustr(self) 3602 self.errmsg = "Expected " + self.name 3603 self.mayReturnEmpty = (self.minLen == 0) 3604 self.mayIndexError = False 3605 3606 def parseImpl(self, instring, loc, doActions=True): 3607 if instring[loc] in self.notChars: 3608 raise ParseException(instring, loc, self.errmsg, self) 3609 3610 start = loc 3611 loc += 1 3612 notchars = self.notChars 3613 maxlen = min(start + self.maxLen, len(instring)) 3614 while loc < maxlen and instring[loc] not in notchars: 3615 loc += 1 3616 3617 if loc - start < self.minLen: 3618 raise ParseException(instring, loc, self.errmsg, self) 3619 3620 return loc, instring[start:loc] 3621 3622 def __str__(self): 3623 try: 3624 return super(CharsNotIn, self).__str__() 3625 except Exception: 3626 pass 3627 3628 if self.strRepr is None: 3629 if len(self.notChars) > 4: 3630 self.strRepr = "!W:(%s...)" % self.notChars[:4] 3631 else: 3632 self.strRepr = "!W:(%s)" % self.notChars 3633 3634 return self.strRepr 3635 3636class White(Token): 3637 """Special matching class for matching whitespace. Normally, 3638 whitespace is ignored by pyparsing grammars. This class is included 3639 when some whitespace structures are significant. Define with 3640 a string containing the whitespace characters to be matched; default 3641 is ``" \\t\\r\\n"``. Also takes optional ``min``, 3642 ``max``, and ``exact`` arguments, as defined for the 3643 :class:`Word` class. 3644 """ 3645 whiteStrs = { 3646 ' ' : '<SP>', 3647 '\t': '<TAB>', 3648 '\n': '<LF>', 3649 '\r': '<CR>', 3650 '\f': '<FF>', 3651 u'\u00A0': '<NBSP>', 3652 u'\u1680': '<OGHAM_SPACE_MARK>', 3653 u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>', 3654 u'\u2000': '<EN_QUAD>', 3655 u'\u2001': '<EM_QUAD>', 3656 u'\u2002': '<EN_SPACE>', 3657 u'\u2003': '<EM_SPACE>', 3658 u'\u2004': '<THREE-PER-EM_SPACE>', 3659 u'\u2005': '<FOUR-PER-EM_SPACE>', 3660 u'\u2006': '<SIX-PER-EM_SPACE>', 3661 u'\u2007': '<FIGURE_SPACE>', 3662 u'\u2008': '<PUNCTUATION_SPACE>', 3663 u'\u2009': '<THIN_SPACE>', 3664 u'\u200A': '<HAIR_SPACE>', 3665 u'\u200B': '<ZERO_WIDTH_SPACE>', 3666 u'\u202F': '<NNBSP>', 3667 u'\u205F': '<MMSP>', 3668 u'\u3000': '<IDEOGRAPHIC_SPACE>', 3669 } 3670 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 3671 super(White, self).__init__() 3672 self.matchWhite = ws 3673 self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite)) 3674 # ~ self.leaveWhitespace() 3675 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 3676 self.mayReturnEmpty = True 3677 self.errmsg = "Expected " + self.name 3678 3679 self.minLen = min 3680 3681 if max > 0: 3682 self.maxLen = max 3683 else: 3684 self.maxLen = _MAX_INT 3685 3686 if exact > 0: 3687 self.maxLen = exact 3688 self.minLen = exact 3689 3690 def parseImpl(self, instring, loc, doActions=True): 3691 if instring[loc] not in self.matchWhite: 3692 raise ParseException(instring, loc, self.errmsg, self) 3693 start = loc 3694 loc += 1 3695 maxloc = start + self.maxLen 3696 maxloc = min(maxloc, len(instring)) 3697 while loc < maxloc and instring[loc] in self.matchWhite: 3698 loc += 1 3699 3700 if loc - start < self.minLen: 3701 raise ParseException(instring, loc, self.errmsg, self) 3702 3703 return loc, instring[start:loc] 3704 3705 3706class _PositionToken(Token): 3707 def __init__(self): 3708 super(_PositionToken, self).__init__() 3709 self.name = self.__class__.__name__ 3710 self.mayReturnEmpty = True 3711 self.mayIndexError = False 3712 3713class GoToColumn(_PositionToken): 3714 """Token to advance to a specific column of input text; useful for 3715 tabular report scraping. 3716 """ 3717 def __init__(self, colno): 3718 super(GoToColumn, self).__init__() 3719 self.col = colno 3720 3721 def preParse(self, instring, loc): 3722 if col(loc, instring) != self.col: 3723 instrlen = len(instring) 3724 if self.ignoreExprs: 3725 loc = self._skipIgnorables(instring, loc) 3726 while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col: 3727 loc += 1 3728 return loc 3729 3730 def parseImpl(self, instring, loc, doActions=True): 3731 thiscol = col(loc, instring) 3732 if thiscol > self.col: 3733 raise ParseException(instring, loc, "Text not in expected column", self) 3734 newloc = loc + self.col - thiscol 3735 ret = instring[loc: newloc] 3736 return newloc, ret 3737 3738 3739class LineStart(_PositionToken): 3740 r"""Matches if current position is at the beginning of a line within 3741 the parse string 3742 3743 Example:: 3744 3745 test = '''\ 3746 AAA this line 3747 AAA and this line 3748 AAA but not this one 3749 B AAA and definitely not this one 3750 ''' 3751 3752 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 3753 print(t) 3754 3755 prints:: 3756 3757 ['AAA', ' this line'] 3758 ['AAA', ' and this line'] 3759 3760 """ 3761 def __init__(self): 3762 super(LineStart, self).__init__() 3763 self.errmsg = "Expected start of line" 3764 3765 def parseImpl(self, instring, loc, doActions=True): 3766 if col(loc, instring) == 1: 3767 return loc, [] 3768 raise ParseException(instring, loc, self.errmsg, self) 3769 3770class LineEnd(_PositionToken): 3771 """Matches if current position is at the end of a line within the 3772 parse string 3773 """ 3774 def __init__(self): 3775 super(LineEnd, self).__init__() 3776 self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) 3777 self.errmsg = "Expected end of line" 3778 3779 def parseImpl(self, instring, loc, doActions=True): 3780 if loc < len(instring): 3781 if instring[loc] == "\n": 3782 return loc + 1, "\n" 3783 else: 3784 raise ParseException(instring, loc, self.errmsg, self) 3785 elif loc == len(instring): 3786 return loc + 1, [] 3787 else: 3788 raise ParseException(instring, loc, self.errmsg, self) 3789 3790class StringStart(_PositionToken): 3791 """Matches if current position is at the beginning of the parse 3792 string 3793 """ 3794 def __init__(self): 3795 super(StringStart, self).__init__() 3796 self.errmsg = "Expected start of text" 3797 3798 def parseImpl(self, instring, loc, doActions=True): 3799 if loc != 0: 3800 # see if entire string up to here is just whitespace and ignoreables 3801 if loc != self.preParse(instring, 0): 3802 raise ParseException(instring, loc, self.errmsg, self) 3803 return loc, [] 3804 3805class StringEnd(_PositionToken): 3806 """Matches if current position is at the end of the parse string 3807 """ 3808 def __init__(self): 3809 super(StringEnd, self).__init__() 3810 self.errmsg = "Expected end of text" 3811 3812 def parseImpl(self, instring, loc, doActions=True): 3813 if loc < len(instring): 3814 raise ParseException(instring, loc, self.errmsg, self) 3815 elif loc == len(instring): 3816 return loc + 1, [] 3817 elif loc > len(instring): 3818 return loc, [] 3819 else: 3820 raise ParseException(instring, loc, self.errmsg, self) 3821 3822class WordStart(_PositionToken): 3823 """Matches if the current position is at the beginning of a Word, 3824 and is not preceded by any character in a given set of 3825 ``wordChars`` (default= ``printables``). To emulate the 3826 ``\b`` behavior of regular expressions, use 3827 ``WordStart(alphanums)``. ``WordStart`` will also match at 3828 the beginning of the string being parsed, or at the beginning of 3829 a line. 3830 """ 3831 def __init__(self, wordChars=printables): 3832 super(WordStart, self).__init__() 3833 self.wordChars = set(wordChars) 3834 self.errmsg = "Not at the start of a word" 3835 3836 def parseImpl(self, instring, loc, doActions=True): 3837 if loc != 0: 3838 if (instring[loc - 1] in self.wordChars 3839 or instring[loc] not in self.wordChars): 3840 raise ParseException(instring, loc, self.errmsg, self) 3841 return loc, [] 3842 3843class WordEnd(_PositionToken): 3844 """Matches if the current position is at the end of a Word, and is 3845 not followed by any character in a given set of ``wordChars`` 3846 (default= ``printables``). To emulate the ``\b`` behavior of 3847 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 3848 will also match at the end of the string being parsed, or at the end 3849 of a line. 3850 """ 3851 def __init__(self, wordChars=printables): 3852 super(WordEnd, self).__init__() 3853 self.wordChars = set(wordChars) 3854 self.skipWhitespace = False 3855 self.errmsg = "Not at the end of a word" 3856 3857 def parseImpl(self, instring, loc, doActions=True): 3858 instrlen = len(instring) 3859 if instrlen > 0 and loc < instrlen: 3860 if (instring[loc] in self.wordChars or 3861 instring[loc - 1] not in self.wordChars): 3862 raise ParseException(instring, loc, self.errmsg, self) 3863 return loc, [] 3864 3865 3866class ParseExpression(ParserElement): 3867 """Abstract subclass of ParserElement, for combining and 3868 post-processing parsed tokens. 3869 """ 3870 def __init__(self, exprs, savelist=False): 3871 super(ParseExpression, self).__init__(savelist) 3872 if isinstance(exprs, _generatorType): 3873 exprs = list(exprs) 3874 3875 if isinstance(exprs, basestring): 3876 self.exprs = [self._literalStringClass(exprs)] 3877 elif isinstance(exprs, ParserElement): 3878 self.exprs = [exprs] 3879 elif isinstance(exprs, Iterable): 3880 exprs = list(exprs) 3881 # if sequence of strings provided, wrap with Literal 3882 if any(isinstance(expr, basestring) for expr in exprs): 3883 exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs) 3884 self.exprs = list(exprs) 3885 else: 3886 try: 3887 self.exprs = list(exprs) 3888 except TypeError: 3889 self.exprs = [exprs] 3890 self.callPreparse = False 3891 3892 def append(self, other): 3893 self.exprs.append(other) 3894 self.strRepr = None 3895 return self 3896 3897 def leaveWhitespace(self): 3898 """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on 3899 all contained expressions.""" 3900 self.skipWhitespace = False 3901 self.exprs = [e.copy() for e in self.exprs] 3902 for e in self.exprs: 3903 e.leaveWhitespace() 3904 return self 3905 3906 def ignore(self, other): 3907 if isinstance(other, Suppress): 3908 if other not in self.ignoreExprs: 3909 super(ParseExpression, self).ignore(other) 3910 for e in self.exprs: 3911 e.ignore(self.ignoreExprs[-1]) 3912 else: 3913 super(ParseExpression, self).ignore(other) 3914 for e in self.exprs: 3915 e.ignore(self.ignoreExprs[-1]) 3916 return self 3917 3918 def __str__(self): 3919 try: 3920 return super(ParseExpression, self).__str__() 3921 except Exception: 3922 pass 3923 3924 if self.strRepr is None: 3925 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs)) 3926 return self.strRepr 3927 3928 def streamline(self): 3929 super(ParseExpression, self).streamline() 3930 3931 for e in self.exprs: 3932 e.streamline() 3933 3934 # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d) 3935 # but only if there are no parse actions or resultsNames on the nested And's 3936 # (likewise for Or's and MatchFirst's) 3937 if len(self.exprs) == 2: 3938 other = self.exprs[0] 3939 if (isinstance(other, self.__class__) 3940 and not other.parseAction 3941 and other.resultsName is None 3942 and not other.debug): 3943 self.exprs = other.exprs[:] + [self.exprs[1]] 3944 self.strRepr = None 3945 self.mayReturnEmpty |= other.mayReturnEmpty 3946 self.mayIndexError |= other.mayIndexError 3947 3948 other = self.exprs[-1] 3949 if (isinstance(other, self.__class__) 3950 and not other.parseAction 3951 and other.resultsName is None 3952 and not other.debug): 3953 self.exprs = self.exprs[:-1] + other.exprs[:] 3954 self.strRepr = None 3955 self.mayReturnEmpty |= other.mayReturnEmpty 3956 self.mayIndexError |= other.mayIndexError 3957 3958 self.errmsg = "Expected " + _ustr(self) 3959 3960 return self 3961 3962 def validate(self, validateTrace=None): 3963 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 3964 for e in self.exprs: 3965 e.validate(tmp) 3966 self.checkRecursion([]) 3967 3968 def copy(self): 3969 ret = super(ParseExpression, self).copy() 3970 ret.exprs = [e.copy() for e in self.exprs] 3971 return ret 3972 3973 def _setResultsName(self, name, listAllMatches=False): 3974 if __diag__.warn_ungrouped_named_tokens_in_collection: 3975 for e in self.exprs: 3976 if isinstance(e, ParserElement) and e.resultsName: 3977 warnings.warn("{0}: setting results name {1!r} on {2} expression " 3978 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", 3979 name, 3980 type(self).__name__, 3981 e.resultsName), 3982 stacklevel=3) 3983 3984 return super(ParseExpression, self)._setResultsName(name, listAllMatches) 3985 3986 3987class And(ParseExpression): 3988 """ 3989 Requires all given :class:`ParseExpression` s to be found in the given order. 3990 Expressions may be separated by whitespace. 3991 May be constructed using the ``'+'`` operator. 3992 May also be constructed using the ``'-'`` operator, which will 3993 suppress backtracking. 3994 3995 Example:: 3996 3997 integer = Word(nums) 3998 name_expr = OneOrMore(Word(alphas)) 3999 4000 expr = And([integer("id"), name_expr("name"), integer("age")]) 4001 # more easily written as: 4002 expr = integer("id") + name_expr("name") + integer("age") 4003 """ 4004 4005 class _ErrorStop(Empty): 4006 def __init__(self, *args, **kwargs): 4007 super(And._ErrorStop, self).__init__(*args, **kwargs) 4008 self.name = '-' 4009 self.leaveWhitespace() 4010 4011 def __init__(self, exprs, savelist=True): 4012 exprs = list(exprs) 4013 if exprs and Ellipsis in exprs: 4014 tmp = [] 4015 for i, expr in enumerate(exprs): 4016 if expr is Ellipsis: 4017 if i < len(exprs) - 1: 4018 skipto_arg = (Empty() + exprs[i + 1]).exprs[-1] 4019 tmp.append(SkipTo(skipto_arg)("_skipped*")) 4020 else: 4021 raise Exception("cannot construct And with sequence ending in ...") 4022 else: 4023 tmp.append(expr) 4024 exprs[:] = tmp 4025 super(And, self).__init__(exprs, savelist) 4026 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 4027 self.setWhitespaceChars(self.exprs[0].whiteChars) 4028 self.skipWhitespace = self.exprs[0].skipWhitespace 4029 self.callPreparse = True 4030 4031 def streamline(self): 4032 # collapse any _PendingSkip's 4033 if self.exprs: 4034 if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip) 4035 for e in self.exprs[:-1]): 4036 for i, e in enumerate(self.exprs[:-1]): 4037 if e is None: 4038 continue 4039 if (isinstance(e, ParseExpression) 4040 and e.exprs and isinstance(e.exprs[-1], _PendingSkip)): 4041 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 4042 self.exprs[i + 1] = None 4043 self.exprs = [e for e in self.exprs if e is not None] 4044 4045 super(And, self).streamline() 4046 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 4047 return self 4048 4049 def parseImpl(self, instring, loc, doActions=True): 4050 # pass False as last arg to _parse for first element, since we already 4051 # pre-parsed the string as part of our And pre-parsing 4052 loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False) 4053 errorStop = False 4054 for e in self.exprs[1:]: 4055 if isinstance(e, And._ErrorStop): 4056 errorStop = True 4057 continue 4058 if errorStop: 4059 try: 4060 loc, exprtokens = e._parse(instring, loc, doActions) 4061 except ParseSyntaxException: 4062 raise 4063 except ParseBaseException as pe: 4064 pe.__traceback__ = None 4065 raise ParseSyntaxException._from_exception(pe) 4066 except IndexError: 4067 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 4068 else: 4069 loc, exprtokens = e._parse(instring, loc, doActions) 4070 if exprtokens or exprtokens.haskeys(): 4071 resultlist += exprtokens 4072 return loc, resultlist 4073 4074 def __iadd__(self, other): 4075 if isinstance(other, basestring): 4076 other = self._literalStringClass(other) 4077 return self.append(other) # And([self, other]) 4078 4079 def checkRecursion(self, parseElementList): 4080 subRecCheckList = parseElementList[:] + [self] 4081 for e in self.exprs: 4082 e.checkRecursion(subRecCheckList) 4083 if not e.mayReturnEmpty: 4084 break 4085 4086 def __str__(self): 4087 if hasattr(self, "name"): 4088 return self.name 4089 4090 if self.strRepr is None: 4091 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 4092 4093 return self.strRepr 4094 4095 4096class Or(ParseExpression): 4097 """Requires that at least one :class:`ParseExpression` is found. If 4098 two expressions match, the expression that matches the longest 4099 string will be used. May be constructed using the ``'^'`` 4100 operator. 4101 4102 Example:: 4103 4104 # construct Or using '^' operator 4105 4106 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 4107 print(number.searchString("123 3.1416 789")) 4108 4109 prints:: 4110 4111 [['123'], ['3.1416'], ['789']] 4112 """ 4113 def __init__(self, exprs, savelist=False): 4114 super(Or, self).__init__(exprs, savelist) 4115 if self.exprs: 4116 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 4117 else: 4118 self.mayReturnEmpty = True 4119 4120 def streamline(self): 4121 super(Or, self).streamline() 4122 if __compat__.collect_all_And_tokens: 4123 self.saveAsList = any(e.saveAsList for e in self.exprs) 4124 return self 4125 4126 def parseImpl(self, instring, loc, doActions=True): 4127 maxExcLoc = -1 4128 maxException = None 4129 matches = [] 4130 for e in self.exprs: 4131 try: 4132 loc2 = e.tryParse(instring, loc) 4133 except ParseException as err: 4134 err.__traceback__ = None 4135 if err.loc > maxExcLoc: 4136 maxException = err 4137 maxExcLoc = err.loc 4138 except IndexError: 4139 if len(instring) > maxExcLoc: 4140 maxException = ParseException(instring, len(instring), e.errmsg, self) 4141 maxExcLoc = len(instring) 4142 else: 4143 # save match among all matches, to retry longest to shortest 4144 matches.append((loc2, e)) 4145 4146 if matches: 4147 # re-evaluate all matches in descending order of length of match, in case attached actions 4148 # might change whether or how much they match of the input. 4149 matches.sort(key=itemgetter(0), reverse=True) 4150 4151 if not doActions: 4152 # no further conditions or parse actions to change the selection of 4153 # alternative, so the first match will be the best match 4154 best_expr = matches[0][1] 4155 return best_expr._parse(instring, loc, doActions) 4156 4157 longest = -1, None 4158 for loc1, expr1 in matches: 4159 if loc1 <= longest[0]: 4160 # already have a longer match than this one will deliver, we are done 4161 return longest 4162 4163 try: 4164 loc2, toks = expr1._parse(instring, loc, doActions) 4165 except ParseException as err: 4166 err.__traceback__ = None 4167 if err.loc > maxExcLoc: 4168 maxException = err 4169 maxExcLoc = err.loc 4170 else: 4171 if loc2 >= loc1: 4172 return loc2, toks 4173 # didn't match as much as before 4174 elif loc2 > longest[0]: 4175 longest = loc2, toks 4176 4177 if longest != (-1, None): 4178 return longest 4179 4180 if maxException is not None: 4181 maxException.msg = self.errmsg 4182 raise maxException 4183 else: 4184 raise ParseException(instring, loc, "no defined alternatives to match", self) 4185 4186 4187 def __ixor__(self, other): 4188 if isinstance(other, basestring): 4189 other = self._literalStringClass(other) 4190 return self.append(other) # Or([self, other]) 4191 4192 def __str__(self): 4193 if hasattr(self, "name"): 4194 return self.name 4195 4196 if self.strRepr is None: 4197 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 4198 4199 return self.strRepr 4200 4201 def checkRecursion(self, parseElementList): 4202 subRecCheckList = parseElementList[:] + [self] 4203 for e in self.exprs: 4204 e.checkRecursion(subRecCheckList) 4205 4206 def _setResultsName(self, name, listAllMatches=False): 4207 if (not __compat__.collect_all_And_tokens 4208 and __diag__.warn_multiple_tokens_in_named_alternation): 4209 if any(isinstance(e, And) for e in self.exprs): 4210 warnings.warn("{0}: setting results name {1!r} on {2} expression " 4211 "may only return a single token for an And alternative, " 4212 "in future will return the full list of tokens".format( 4213 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), 4214 stacklevel=3) 4215 4216 return super(Or, self)._setResultsName(name, listAllMatches) 4217 4218 4219class MatchFirst(ParseExpression): 4220 """Requires that at least one :class:`ParseExpression` is found. If 4221 two expressions match, the first one listed is the one that will 4222 match. May be constructed using the ``'|'`` operator. 4223 4224 Example:: 4225 4226 # construct MatchFirst using '|' operator 4227 4228 # watch the order of expressions to match 4229 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 4230 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 4231 4232 # put more selective expression first 4233 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 4234 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 4235 """ 4236 def __init__(self, exprs, savelist=False): 4237 super(MatchFirst, self).__init__(exprs, savelist) 4238 if self.exprs: 4239 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 4240 else: 4241 self.mayReturnEmpty = True 4242 4243 def streamline(self): 4244 super(MatchFirst, self).streamline() 4245 if __compat__.collect_all_And_tokens: 4246 self.saveAsList = any(e.saveAsList for e in self.exprs) 4247 return self 4248 4249 def parseImpl(self, instring, loc, doActions=True): 4250 maxExcLoc = -1 4251 maxException = None 4252 for e in self.exprs: 4253 try: 4254 ret = e._parse(instring, loc, doActions) 4255 return ret 4256 except ParseException as err: 4257 if err.loc > maxExcLoc: 4258 maxException = err 4259 maxExcLoc = err.loc 4260 except IndexError: 4261 if len(instring) > maxExcLoc: 4262 maxException = ParseException(instring, len(instring), e.errmsg, self) 4263 maxExcLoc = len(instring) 4264 4265 # only got here if no expression matched, raise exception for match that made it the furthest 4266 else: 4267 if maxException is not None: 4268 maxException.msg = self.errmsg 4269 raise maxException 4270 else: 4271 raise ParseException(instring, loc, "no defined alternatives to match", self) 4272 4273 def __ior__(self, other): 4274 if isinstance(other, basestring): 4275 other = self._literalStringClass(other) 4276 return self.append(other) # MatchFirst([self, other]) 4277 4278 def __str__(self): 4279 if hasattr(self, "name"): 4280 return self.name 4281 4282 if self.strRepr is None: 4283 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 4284 4285 return self.strRepr 4286 4287 def checkRecursion(self, parseElementList): 4288 subRecCheckList = parseElementList[:] + [self] 4289 for e in self.exprs: 4290 e.checkRecursion(subRecCheckList) 4291 4292 def _setResultsName(self, name, listAllMatches=False): 4293 if (not __compat__.collect_all_And_tokens 4294 and __diag__.warn_multiple_tokens_in_named_alternation): 4295 if any(isinstance(e, And) for e in self.exprs): 4296 warnings.warn("{0}: setting results name {1!r} on {2} expression " 4297 "may only return a single token for an And alternative, " 4298 "in future will return the full list of tokens".format( 4299 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), 4300 stacklevel=3) 4301 4302 return super(MatchFirst, self)._setResultsName(name, listAllMatches) 4303 4304 4305class Each(ParseExpression): 4306 """Requires all given :class:`ParseExpression` s to be found, but in 4307 any order. Expressions may be separated by whitespace. 4308 4309 May be constructed using the ``'&'`` operator. 4310 4311 Example:: 4312 4313 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 4314 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 4315 integer = Word(nums) 4316 shape_attr = "shape:" + shape_type("shape") 4317 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 4318 color_attr = "color:" + color("color") 4319 size_attr = "size:" + integer("size") 4320 4321 # use Each (using operator '&') to accept attributes in any order 4322 # (shape and posn are required, color and size are optional) 4323 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 4324 4325 shape_spec.runTests(''' 4326 shape: SQUARE color: BLACK posn: 100, 120 4327 shape: CIRCLE size: 50 color: BLUE posn: 50,80 4328 color:GREEN size:20 shape:TRIANGLE posn:20,40 4329 ''' 4330 ) 4331 4332 prints:: 4333 4334 shape: SQUARE color: BLACK posn: 100, 120 4335 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 4336 - color: BLACK 4337 - posn: ['100', ',', '120'] 4338 - x: 100 4339 - y: 120 4340 - shape: SQUARE 4341 4342 4343 shape: CIRCLE size: 50 color: BLUE posn: 50,80 4344 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 4345 - color: BLUE 4346 - posn: ['50', ',', '80'] 4347 - x: 50 4348 - y: 80 4349 - shape: CIRCLE 4350 - size: 50 4351 4352 4353 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 4354 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 4355 - color: GREEN 4356 - posn: ['20', ',', '40'] 4357 - x: 20 4358 - y: 40 4359 - shape: TRIANGLE 4360 - size: 20 4361 """ 4362 def __init__(self, exprs, savelist=True): 4363 super(Each, self).__init__(exprs, savelist) 4364 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 4365 self.skipWhitespace = True 4366 self.initExprGroups = True 4367 self.saveAsList = True 4368 4369 def streamline(self): 4370 super(Each, self).streamline() 4371 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 4372 return self 4373 4374 def parseImpl(self, instring, loc, doActions=True): 4375 if self.initExprGroups: 4376 self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional)) 4377 opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)] 4378 opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))] 4379 self.optionals = opt1 + opt2 4380 self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)] 4381 self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)] 4382 self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))] 4383 self.required += self.multirequired 4384 self.initExprGroups = False 4385 tmpLoc = loc 4386 tmpReqd = self.required[:] 4387 tmpOpt = self.optionals[:] 4388 matchOrder = [] 4389 4390 keepMatching = True 4391 while keepMatching: 4392 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 4393 failed = [] 4394 for e in tmpExprs: 4395 try: 4396 tmpLoc = e.tryParse(instring, tmpLoc) 4397 except ParseException: 4398 failed.append(e) 4399 else: 4400 matchOrder.append(self.opt1map.get(id(e), e)) 4401 if e in tmpReqd: 4402 tmpReqd.remove(e) 4403 elif e in tmpOpt: 4404 tmpOpt.remove(e) 4405 if len(failed) == len(tmpExprs): 4406 keepMatching = False 4407 4408 if tmpReqd: 4409 missing = ", ".join(_ustr(e) for e in tmpReqd) 4410 raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing) 4411 4412 # add any unmatched Optionals, in case they have default values defined 4413 matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt] 4414 4415 resultlist = [] 4416 for e in matchOrder: 4417 loc, results = e._parse(instring, loc, doActions) 4418 resultlist.append(results) 4419 4420 finalResults = sum(resultlist, ParseResults([])) 4421 return loc, finalResults 4422 4423 def __str__(self): 4424 if hasattr(self, "name"): 4425 return self.name 4426 4427 if self.strRepr is None: 4428 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 4429 4430 return self.strRepr 4431 4432 def checkRecursion(self, parseElementList): 4433 subRecCheckList = parseElementList[:] + [self] 4434 for e in self.exprs: 4435 e.checkRecursion(subRecCheckList) 4436 4437 4438class ParseElementEnhance(ParserElement): 4439 """Abstract subclass of :class:`ParserElement`, for combining and 4440 post-processing parsed tokens. 4441 """ 4442 def __init__(self, expr, savelist=False): 4443 super(ParseElementEnhance, self).__init__(savelist) 4444 if isinstance(expr, basestring): 4445 if issubclass(self._literalStringClass, Token): 4446 expr = self._literalStringClass(expr) 4447 else: 4448 expr = self._literalStringClass(Literal(expr)) 4449 self.expr = expr 4450 self.strRepr = None 4451 if expr is not None: 4452 self.mayIndexError = expr.mayIndexError 4453 self.mayReturnEmpty = expr.mayReturnEmpty 4454 self.setWhitespaceChars(expr.whiteChars) 4455 self.skipWhitespace = expr.skipWhitespace 4456 self.saveAsList = expr.saveAsList 4457 self.callPreparse = expr.callPreparse 4458 self.ignoreExprs.extend(expr.ignoreExprs) 4459 4460 def parseImpl(self, instring, loc, doActions=True): 4461 if self.expr is not None: 4462 return self.expr._parse(instring, loc, doActions, callPreParse=False) 4463 else: 4464 raise ParseException("", loc, self.errmsg, self) 4465 4466 def leaveWhitespace(self): 4467 self.skipWhitespace = False 4468 self.expr = self.expr.copy() 4469 if self.expr is not None: 4470 self.expr.leaveWhitespace() 4471 return self 4472 4473 def ignore(self, other): 4474 if isinstance(other, Suppress): 4475 if other not in self.ignoreExprs: 4476 super(ParseElementEnhance, self).ignore(other) 4477 if self.expr is not None: 4478 self.expr.ignore(self.ignoreExprs[-1]) 4479 else: 4480 super(ParseElementEnhance, self).ignore(other) 4481 if self.expr is not None: 4482 self.expr.ignore(self.ignoreExprs[-1]) 4483 return self 4484 4485 def streamline(self): 4486 super(ParseElementEnhance, self).streamline() 4487 if self.expr is not None: 4488 self.expr.streamline() 4489 return self 4490 4491 def checkRecursion(self, parseElementList): 4492 if self in parseElementList: 4493 raise RecursiveGrammarException(parseElementList + [self]) 4494 subRecCheckList = parseElementList[:] + [self] 4495 if self.expr is not None: 4496 self.expr.checkRecursion(subRecCheckList) 4497 4498 def validate(self, validateTrace=None): 4499 if validateTrace is None: 4500 validateTrace = [] 4501 tmp = validateTrace[:] + [self] 4502 if self.expr is not None: 4503 self.expr.validate(tmp) 4504 self.checkRecursion([]) 4505 4506 def __str__(self): 4507 try: 4508 return super(ParseElementEnhance, self).__str__() 4509 except Exception: 4510 pass 4511 4512 if self.strRepr is None and self.expr is not None: 4513 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr)) 4514 return self.strRepr 4515 4516 4517class FollowedBy(ParseElementEnhance): 4518 """Lookahead matching of the given parse expression. 4519 ``FollowedBy`` does *not* advance the parsing position within 4520 the input string, it only verifies that the specified parse 4521 expression matches at the current position. ``FollowedBy`` 4522 always returns a null token list. If any results names are defined 4523 in the lookahead expression, those *will* be returned for access by 4524 name. 4525 4526 Example:: 4527 4528 # use FollowedBy to match a label only if it is followed by a ':' 4529 data_word = Word(alphas) 4530 label = data_word + FollowedBy(':') 4531 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4532 4533 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 4534 4535 prints:: 4536 4537 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 4538 """ 4539 def __init__(self, expr): 4540 super(FollowedBy, self).__init__(expr) 4541 self.mayReturnEmpty = True 4542 4543 def parseImpl(self, instring, loc, doActions=True): 4544 # by using self._expr.parse and deleting the contents of the returned ParseResults list 4545 # we keep any named results that were defined in the FollowedBy expression 4546 _, ret = self.expr._parse(instring, loc, doActions=doActions) 4547 del ret[:] 4548 4549 return loc, ret 4550 4551 4552class PrecededBy(ParseElementEnhance): 4553 """Lookbehind matching of the given parse expression. 4554 ``PrecededBy`` does not advance the parsing position within the 4555 input string, it only verifies that the specified parse expression 4556 matches prior to the current position. ``PrecededBy`` always 4557 returns a null token list, but if a results name is defined on the 4558 given expression, it is returned. 4559 4560 Parameters: 4561 4562 - expr - expression that must match prior to the current parse 4563 location 4564 - retreat - (default= ``None``) - (int) maximum number of characters 4565 to lookbehind prior to the current parse location 4566 4567 If the lookbehind expression is a string, Literal, Keyword, or 4568 a Word or CharsNotIn with a specified exact or maximum length, then 4569 the retreat parameter is not required. Otherwise, retreat must be 4570 specified to give a maximum number of characters to look back from 4571 the current parse position for a lookbehind match. 4572 4573 Example:: 4574 4575 # VB-style variable names with type prefixes 4576 int_var = PrecededBy("#") + pyparsing_common.identifier 4577 str_var = PrecededBy("$") + pyparsing_common.identifier 4578 4579 """ 4580 def __init__(self, expr, retreat=None): 4581 super(PrecededBy, self).__init__(expr) 4582 self.expr = self.expr().leaveWhitespace() 4583 self.mayReturnEmpty = True 4584 self.mayIndexError = False 4585 self.exact = False 4586 if isinstance(expr, str): 4587 retreat = len(expr) 4588 self.exact = True 4589 elif isinstance(expr, (Literal, Keyword)): 4590 retreat = expr.matchLen 4591 self.exact = True 4592 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 4593 retreat = expr.maxLen 4594 self.exact = True 4595 elif isinstance(expr, _PositionToken): 4596 retreat = 0 4597 self.exact = True 4598 self.retreat = retreat 4599 self.errmsg = "not preceded by " + str(expr) 4600 self.skipWhitespace = False 4601 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 4602 4603 def parseImpl(self, instring, loc=0, doActions=True): 4604 if self.exact: 4605 if loc < self.retreat: 4606 raise ParseException(instring, loc, self.errmsg) 4607 start = loc - self.retreat 4608 _, ret = self.expr._parse(instring, start) 4609 else: 4610 # retreat specified a maximum lookbehind window, iterate 4611 test_expr = self.expr + StringEnd() 4612 instring_slice = instring[max(0, loc - self.retreat):loc] 4613 last_expr = ParseException(instring, loc, self.errmsg) 4614 for offset in range(1, min(loc, self.retreat + 1)+1): 4615 try: 4616 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 4617 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 4618 except ParseBaseException as pbe: 4619 last_expr = pbe 4620 else: 4621 break 4622 else: 4623 raise last_expr 4624 return loc, ret 4625 4626 4627class NotAny(ParseElementEnhance): 4628 """Lookahead to disallow matching with the given parse expression. 4629 ``NotAny`` does *not* advance the parsing position within the 4630 input string, it only verifies that the specified parse expression 4631 does *not* match at the current position. Also, ``NotAny`` does 4632 *not* skip over leading whitespace. ``NotAny`` always returns 4633 a null token list. May be constructed using the '~' operator. 4634 4635 Example:: 4636 4637 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 4638 4639 # take care not to mistake keywords for identifiers 4640 ident = ~(AND | OR | NOT) + Word(alphas) 4641 boolean_term = Optional(NOT) + ident 4642 4643 # very crude boolean expression - to support parenthesis groups and 4644 # operation hierarchy, use infixNotation 4645 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term) 4646 4647 # integers that are followed by "." are actually floats 4648 integer = Word(nums) + ~Char(".") 4649 """ 4650 def __init__(self, expr): 4651 super(NotAny, self).__init__(expr) 4652 # ~ self.leaveWhitespace() 4653 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 4654 self.mayReturnEmpty = True 4655 self.errmsg = "Found unwanted token, " + _ustr(self.expr) 4656 4657 def parseImpl(self, instring, loc, doActions=True): 4658 if self.expr.canParseNext(instring, loc): 4659 raise ParseException(instring, loc, self.errmsg, self) 4660 return loc, [] 4661 4662 def __str__(self): 4663 if hasattr(self, "name"): 4664 return self.name 4665 4666 if self.strRepr is None: 4667 self.strRepr = "~{" + _ustr(self.expr) + "}" 4668 4669 return self.strRepr 4670 4671class _MultipleMatch(ParseElementEnhance): 4672 def __init__(self, expr, stopOn=None): 4673 super(_MultipleMatch, self).__init__(expr) 4674 self.saveAsList = True 4675 ender = stopOn 4676 if isinstance(ender, basestring): 4677 ender = self._literalStringClass(ender) 4678 self.stopOn(ender) 4679 4680 def stopOn(self, ender): 4681 if isinstance(ender, basestring): 4682 ender = self._literalStringClass(ender) 4683 self.not_ender = ~ender if ender is not None else None 4684 return self 4685 4686 def parseImpl(self, instring, loc, doActions=True): 4687 self_expr_parse = self.expr._parse 4688 self_skip_ignorables = self._skipIgnorables 4689 check_ender = self.not_ender is not None 4690 if check_ender: 4691 try_not_ender = self.not_ender.tryParse 4692 4693 # must be at least one (but first see if we are the stopOn sentinel; 4694 # if so, fail) 4695 if check_ender: 4696 try_not_ender(instring, loc) 4697 loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False) 4698 try: 4699 hasIgnoreExprs = (not not self.ignoreExprs) 4700 while 1: 4701 if check_ender: 4702 try_not_ender(instring, loc) 4703 if hasIgnoreExprs: 4704 preloc = self_skip_ignorables(instring, loc) 4705 else: 4706 preloc = loc 4707 loc, tmptokens = self_expr_parse(instring, preloc, doActions) 4708 if tmptokens or tmptokens.haskeys(): 4709 tokens += tmptokens 4710 except (ParseException, IndexError): 4711 pass 4712 4713 return loc, tokens 4714 4715 def _setResultsName(self, name, listAllMatches=False): 4716 if __diag__.warn_ungrouped_named_tokens_in_collection: 4717 for e in [self.expr] + getattr(self.expr, 'exprs', []): 4718 if isinstance(e, ParserElement) and e.resultsName: 4719 warnings.warn("{0}: setting results name {1!r} on {2} expression " 4720 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", 4721 name, 4722 type(self).__name__, 4723 e.resultsName), 4724 stacklevel=3) 4725 4726 return super(_MultipleMatch, self)._setResultsName(name, listAllMatches) 4727 4728 4729class OneOrMore(_MultipleMatch): 4730 """Repetition of one or more of the given expression. 4731 4732 Parameters: 4733 - expr - expression that must match one or more times 4734 - stopOn - (default= ``None``) - expression for a terminating sentinel 4735 (only required if the sentinel would ordinarily match the repetition 4736 expression) 4737 4738 Example:: 4739 4740 data_word = Word(alphas) 4741 label = data_word + FollowedBy(':') 4742 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4743 4744 text = "shape: SQUARE posn: upper left color: BLACK" 4745 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 4746 4747 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 4748 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4749 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 4750 4751 # could also be written as 4752 (attr_expr * (1,)).parseString(text).pprint() 4753 """ 4754 4755 def __str__(self): 4756 if hasattr(self, "name"): 4757 return self.name 4758 4759 if self.strRepr is None: 4760 self.strRepr = "{" + _ustr(self.expr) + "}..." 4761 4762 return self.strRepr 4763 4764class ZeroOrMore(_MultipleMatch): 4765 """Optional repetition of zero or more of the given expression. 4766 4767 Parameters: 4768 - expr - expression that must match zero or more times 4769 - stopOn - (default= ``None``) - expression for a terminating sentinel 4770 (only required if the sentinel would ordinarily match the repetition 4771 expression) 4772 4773 Example: similar to :class:`OneOrMore` 4774 """ 4775 def __init__(self, expr, stopOn=None): 4776 super(ZeroOrMore, self).__init__(expr, stopOn=stopOn) 4777 self.mayReturnEmpty = True 4778 4779 def parseImpl(self, instring, loc, doActions=True): 4780 try: 4781 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 4782 except (ParseException, IndexError): 4783 return loc, [] 4784 4785 def __str__(self): 4786 if hasattr(self, "name"): 4787 return self.name 4788 4789 if self.strRepr is None: 4790 self.strRepr = "[" + _ustr(self.expr) + "]..." 4791 4792 return self.strRepr 4793 4794 4795class _NullToken(object): 4796 def __bool__(self): 4797 return False 4798 __nonzero__ = __bool__ 4799 def __str__(self): 4800 return "" 4801 4802class Optional(ParseElementEnhance): 4803 """Optional matching of the given expression. 4804 4805 Parameters: 4806 - expr - expression that must match zero or more times 4807 - default (optional) - value to be returned if the optional expression is not found. 4808 4809 Example:: 4810 4811 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 4812 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 4813 zip.runTests(''' 4814 # traditional ZIP code 4815 12345 4816 4817 # ZIP+4 form 4818 12101-0001 4819 4820 # invalid ZIP 4821 98765- 4822 ''') 4823 4824 prints:: 4825 4826 # traditional ZIP code 4827 12345 4828 ['12345'] 4829 4830 # ZIP+4 form 4831 12101-0001 4832 ['12101-0001'] 4833 4834 # invalid ZIP 4835 98765- 4836 ^ 4837 FAIL: Expected end of text (at char 5), (line:1, col:6) 4838 """ 4839 __optionalNotMatched = _NullToken() 4840 4841 def __init__(self, expr, default=__optionalNotMatched): 4842 super(Optional, self).__init__(expr, savelist=False) 4843 self.saveAsList = self.expr.saveAsList 4844 self.defaultValue = default 4845 self.mayReturnEmpty = True 4846 4847 def parseImpl(self, instring, loc, doActions=True): 4848 try: 4849 loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) 4850 except (ParseException, IndexError): 4851 if self.defaultValue is not self.__optionalNotMatched: 4852 if self.expr.resultsName: 4853 tokens = ParseResults([self.defaultValue]) 4854 tokens[self.expr.resultsName] = self.defaultValue 4855 else: 4856 tokens = [self.defaultValue] 4857 else: 4858 tokens = [] 4859 return loc, tokens 4860 4861 def __str__(self): 4862 if hasattr(self, "name"): 4863 return self.name 4864 4865 if self.strRepr is None: 4866 self.strRepr = "[" + _ustr(self.expr) + "]" 4867 4868 return self.strRepr 4869 4870class SkipTo(ParseElementEnhance): 4871 """Token for skipping over all undefined text until the matched 4872 expression is found. 4873 4874 Parameters: 4875 - expr - target expression marking the end of the data to be skipped 4876 - include - (default= ``False``) if True, the target expression is also parsed 4877 (the skipped text and target expression are returned as a 2-element list). 4878 - ignore - (default= ``None``) used to define grammars (typically quoted strings and 4879 comments) that might contain false matches to the target expression 4880 - failOn - (default= ``None``) define expressions that are not allowed to be 4881 included in the skipped test; if found before the target expression is found, 4882 the SkipTo is not a match 4883 4884 Example:: 4885 4886 report = ''' 4887 Outstanding Issues Report - 1 Jan 2000 4888 4889 # | Severity | Description | Days Open 4890 -----+----------+-------------------------------------------+----------- 4891 101 | Critical | Intermittent system crash | 6 4892 94 | Cosmetic | Spelling error on Login ('log|n') | 14 4893 79 | Minor | System slow when running too many reports | 47 4894 ''' 4895 integer = Word(nums) 4896 SEP = Suppress('|') 4897 # use SkipTo to simply match everything up until the next SEP 4898 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 4899 # - parse action will call token.strip() for each matched token, i.e., the description body 4900 string_data = SkipTo(SEP, ignore=quotedString) 4901 string_data.setParseAction(tokenMap(str.strip)) 4902 ticket_expr = (integer("issue_num") + SEP 4903 + string_data("sev") + SEP 4904 + string_data("desc") + SEP 4905 + integer("days_open")) 4906 4907 for tkt in ticket_expr.searchString(report): 4908 print tkt.dump() 4909 4910 prints:: 4911 4912 ['101', 'Critical', 'Intermittent system crash', '6'] 4913 - days_open: 6 4914 - desc: Intermittent system crash 4915 - issue_num: 101 4916 - sev: Critical 4917 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4918 - days_open: 14 4919 - desc: Spelling error on Login ('log|n') 4920 - issue_num: 94 4921 - sev: Cosmetic 4922 ['79', 'Minor', 'System slow when running too many reports', '47'] 4923 - days_open: 47 4924 - desc: System slow when running too many reports 4925 - issue_num: 79 4926 - sev: Minor 4927 """ 4928 def __init__(self, other, include=False, ignore=None, failOn=None): 4929 super(SkipTo, self).__init__(other) 4930 self.ignoreExpr = ignore 4931 self.mayReturnEmpty = True 4932 self.mayIndexError = False 4933 self.includeMatch = include 4934 self.saveAsList = False 4935 if isinstance(failOn, basestring): 4936 self.failOn = self._literalStringClass(failOn) 4937 else: 4938 self.failOn = failOn 4939 self.errmsg = "No match found for " + _ustr(self.expr) 4940 4941 def parseImpl(self, instring, loc, doActions=True): 4942 startloc = loc 4943 instrlen = len(instring) 4944 expr = self.expr 4945 expr_parse = self.expr._parse 4946 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 4947 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 4948 4949 tmploc = loc 4950 while tmploc <= instrlen: 4951 if self_failOn_canParseNext is not None: 4952 # break if failOn expression matches 4953 if self_failOn_canParseNext(instring, tmploc): 4954 break 4955 4956 if self_ignoreExpr_tryParse is not None: 4957 # advance past ignore expressions 4958 while 1: 4959 try: 4960 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 4961 except ParseBaseException: 4962 break 4963 4964 try: 4965 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 4966 except (ParseException, IndexError): 4967 # no match, advance loc in string 4968 tmploc += 1 4969 else: 4970 # matched skipto expr, done 4971 break 4972 4973 else: 4974 # ran off the end of the input string without matching skipto expr, fail 4975 raise ParseException(instring, loc, self.errmsg, self) 4976 4977 # build up return values 4978 loc = tmploc 4979 skiptext = instring[startloc:loc] 4980 skipresult = ParseResults(skiptext) 4981 4982 if self.includeMatch: 4983 loc, mat = expr_parse(instring, loc, doActions, callPreParse=False) 4984 skipresult += mat 4985 4986 return loc, skipresult 4987 4988class Forward(ParseElementEnhance): 4989 """Forward declaration of an expression to be defined later - 4990 used for recursive grammars, such as algebraic infix notation. 4991 When the expression is known, it is assigned to the ``Forward`` 4992 variable using the '<<' operator. 4993 4994 Note: take care when assigning to ``Forward`` not to overlook 4995 precedence of operators. 4996 4997 Specifically, '|' has a lower precedence than '<<', so that:: 4998 4999 fwdExpr << a | b | c 5000 5001 will actually be evaluated as:: 5002 5003 (fwdExpr << a) | b | c 5004 5005 thereby leaving b and c out as parseable alternatives. It is recommended that you 5006 explicitly group the values inserted into the ``Forward``:: 5007 5008 fwdExpr << (a | b | c) 5009 5010 Converting to use the '<<=' operator instead will avoid this problem. 5011 5012 See :class:`ParseResults.pprint` for an example of a recursive 5013 parser created using ``Forward``. 5014 """ 5015 def __init__(self, other=None): 5016 super(Forward, self).__init__(other, savelist=False) 5017 5018 def __lshift__(self, other): 5019 if isinstance(other, basestring): 5020 other = self._literalStringClass(other) 5021 self.expr = other 5022 self.strRepr = None 5023 self.mayIndexError = self.expr.mayIndexError 5024 self.mayReturnEmpty = self.expr.mayReturnEmpty 5025 self.setWhitespaceChars(self.expr.whiteChars) 5026 self.skipWhitespace = self.expr.skipWhitespace 5027 self.saveAsList = self.expr.saveAsList 5028 self.ignoreExprs.extend(self.expr.ignoreExprs) 5029 return self 5030 5031 def __ilshift__(self, other): 5032 return self << other 5033 5034 def leaveWhitespace(self): 5035 self.skipWhitespace = False 5036 return self 5037 5038 def streamline(self): 5039 if not self.streamlined: 5040 self.streamlined = True 5041 if self.expr is not None: 5042 self.expr.streamline() 5043 return self 5044 5045 def validate(self, validateTrace=None): 5046 if validateTrace is None: 5047 validateTrace = [] 5048 5049 if self not in validateTrace: 5050 tmp = validateTrace[:] + [self] 5051 if self.expr is not None: 5052 self.expr.validate(tmp) 5053 self.checkRecursion([]) 5054 5055 def __str__(self): 5056 if hasattr(self, "name"): 5057 return self.name 5058 if self.strRepr is not None: 5059 return self.strRepr 5060 5061 # Avoid infinite recursion by setting a temporary strRepr 5062 self.strRepr = ": ..." 5063 5064 # Use the string representation of main expression. 5065 retString = '...' 5066 try: 5067 if self.expr is not None: 5068 retString = _ustr(self.expr)[:1000] 5069 else: 5070 retString = "None" 5071 finally: 5072 self.strRepr = self.__class__.__name__ + ": " + retString 5073 return self.strRepr 5074 5075 def copy(self): 5076 if self.expr is not None: 5077 return super(Forward, self).copy() 5078 else: 5079 ret = Forward() 5080 ret <<= self 5081 return ret 5082 5083 def _setResultsName(self, name, listAllMatches=False): 5084 if __diag__.warn_name_set_on_empty_Forward: 5085 if self.expr is None: 5086 warnings.warn("{0}: setting results name {0!r} on {1} expression " 5087 "that has no contained expression".format("warn_name_set_on_empty_Forward", 5088 name, 5089 type(self).__name__), 5090 stacklevel=3) 5091 5092 return super(Forward, self)._setResultsName(name, listAllMatches) 5093 5094class TokenConverter(ParseElementEnhance): 5095 """ 5096 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 5097 """ 5098 def __init__(self, expr, savelist=False): 5099 super(TokenConverter, self).__init__(expr) # , savelist) 5100 self.saveAsList = False 5101 5102class Combine(TokenConverter): 5103 """Converter to concatenate all matching tokens to a single string. 5104 By default, the matching patterns must also be contiguous in the 5105 input string; this can be disabled by specifying 5106 ``'adjacent=False'`` in the constructor. 5107 5108 Example:: 5109 5110 real = Word(nums) + '.' + Word(nums) 5111 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 5112 # will also erroneously match the following 5113 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 5114 5115 real = Combine(Word(nums) + '.' + Word(nums)) 5116 print(real.parseString('3.1416')) # -> ['3.1416'] 5117 # no match when there are internal spaces 5118 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 5119 """ 5120 def __init__(self, expr, joinString="", adjacent=True): 5121 super(Combine, self).__init__(expr) 5122 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 5123 if adjacent: 5124 self.leaveWhitespace() 5125 self.adjacent = adjacent 5126 self.skipWhitespace = True 5127 self.joinString = joinString 5128 self.callPreparse = True 5129 5130 def ignore(self, other): 5131 if self.adjacent: 5132 ParserElement.ignore(self, other) 5133 else: 5134 super(Combine, self).ignore(other) 5135 return self 5136 5137 def postParse(self, instring, loc, tokenlist): 5138 retToks = tokenlist.copy() 5139 del retToks[:] 5140 retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults) 5141 5142 if self.resultsName and retToks.haskeys(): 5143 return [retToks] 5144 else: 5145 return retToks 5146 5147class Group(TokenConverter): 5148 """Converter to return the matched tokens as a list - useful for 5149 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 5150 5151 Example:: 5152 5153 ident = Word(alphas) 5154 num = Word(nums) 5155 term = ident | num 5156 func = ident + Optional(delimitedList(term)) 5157 print(func.parseString("fn a, b, 100")) # -> ['fn', 'a', 'b', '100'] 5158 5159 func = ident + Group(Optional(delimitedList(term))) 5160 print(func.parseString("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']] 5161 """ 5162 def __init__(self, expr): 5163 super(Group, self).__init__(expr) 5164 self.saveAsList = True 5165 5166 def postParse(self, instring, loc, tokenlist): 5167 return [tokenlist] 5168 5169class Dict(TokenConverter): 5170 """Converter to return a repetitive expression as a list, but also 5171 as a dictionary. Each element can also be referenced using the first 5172 token in the expression as its key. Useful for tabular report 5173 scraping when the first column can be used as a item key. 5174 5175 Example:: 5176 5177 data_word = Word(alphas) 5178 label = data_word + FollowedBy(':') 5179 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 5180 5181 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 5182 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 5183 5184 # print attributes as plain groups 5185 print(OneOrMore(attr_expr).parseString(text).dump()) 5186 5187 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 5188 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 5189 print(result.dump()) 5190 5191 # access named fields as dict entries, or output as dict 5192 print(result['shape']) 5193 print(result.asDict()) 5194 5195 prints:: 5196 5197 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 5198 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 5199 - color: light blue 5200 - posn: upper left 5201 - shape: SQUARE 5202 - texture: burlap 5203 SQUARE 5204 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 5205 5206 See more examples at :class:`ParseResults` of accessing fields by results name. 5207 """ 5208 def __init__(self, expr): 5209 super(Dict, self).__init__(expr) 5210 self.saveAsList = True 5211 5212 def postParse(self, instring, loc, tokenlist): 5213 for i, tok in enumerate(tokenlist): 5214 if len(tok) == 0: 5215 continue 5216 ikey = tok[0] 5217 if isinstance(ikey, int): 5218 ikey = _ustr(tok[0]).strip() 5219 if len(tok) == 1: 5220 tokenlist[ikey] = _ParseResultsWithOffset("", i) 5221 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 5222 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 5223 else: 5224 dictvalue = tok.copy() # ParseResults(i) 5225 del dictvalue[0] 5226 if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()): 5227 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 5228 else: 5229 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 5230 5231 if self.resultsName: 5232 return [tokenlist] 5233 else: 5234 return tokenlist 5235 5236 5237class Suppress(TokenConverter): 5238 """Converter for ignoring the results of a parsed expression. 5239 5240 Example:: 5241 5242 source = "a, b, c,d" 5243 wd = Word(alphas) 5244 wd_list1 = wd + ZeroOrMore(',' + wd) 5245 print(wd_list1.parseString(source)) 5246 5247 # often, delimiters that are useful during parsing are just in the 5248 # way afterward - use Suppress to keep them out of the parsed output 5249 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 5250 print(wd_list2.parseString(source)) 5251 5252 prints:: 5253 5254 ['a', ',', 'b', ',', 'c', ',', 'd'] 5255 ['a', 'b', 'c', 'd'] 5256 5257 (See also :class:`delimitedList`.) 5258 """ 5259 def postParse(self, instring, loc, tokenlist): 5260 return [] 5261 5262 def suppress(self): 5263 return self 5264 5265 5266class OnlyOnce(object): 5267 """Wrapper for parse actions, to ensure they are only called once. 5268 """ 5269 def __init__(self, methodCall): 5270 self.callable = _trim_arity(methodCall) 5271 self.called = False 5272 def __call__(self, s, l, t): 5273 if not self.called: 5274 results = self.callable(s, l, t) 5275 self.called = True 5276 return results 5277 raise ParseException(s, l, "") 5278 def reset(self): 5279 self.called = False 5280 5281def traceParseAction(f): 5282 """Decorator for debugging parse actions. 5283 5284 When the parse action is called, this decorator will print 5285 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 5286 When the parse action completes, the decorator will print 5287 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 5288 5289 Example:: 5290 5291 wd = Word(alphas) 5292 5293 @traceParseAction 5294 def remove_duplicate_chars(tokens): 5295 return ''.join(sorted(set(''.join(tokens)))) 5296 5297 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 5298 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 5299 5300 prints:: 5301 5302 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 5303 <<leaving remove_duplicate_chars (ret: 'dfjkls') 5304 ['dfjkls'] 5305 """ 5306 f = _trim_arity(f) 5307 def z(*paArgs): 5308 thisFunc = f.__name__ 5309 s, l, t = paArgs[-3:] 5310 if len(paArgs) > 3: 5311 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 5312 sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t)) 5313 try: 5314 ret = f(*paArgs) 5315 except Exception as exc: 5316 sys.stderr.write("<<leaving %s (exception: %s)\n" % (thisFunc, exc)) 5317 raise 5318 sys.stderr.write("<<leaving %s (ret: %r)\n" % (thisFunc, ret)) 5319 return ret 5320 try: 5321 z.__name__ = f.__name__ 5322 except AttributeError: 5323 pass 5324 return z 5325 5326# 5327# global helpers 5328# 5329def delimitedList(expr, delim=",", combine=False): 5330 """Helper to define a delimited list of expressions - the delimiter 5331 defaults to ','. By default, the list elements and delimiters can 5332 have intervening whitespace, and comments, but this can be 5333 overridden by passing ``combine=True`` in the constructor. If 5334 ``combine`` is set to ``True``, the matching tokens are 5335 returned as a single token string, with the delimiters included; 5336 otherwise, the matching tokens are returned as a list of tokens, 5337 with the delimiters suppressed. 5338 5339 Example:: 5340 5341 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 5342 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 5343 """ 5344 dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..." 5345 if combine: 5346 return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName) 5347 else: 5348 return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName) 5349 5350def countedArray(expr, intExpr=None): 5351 """Helper to define a counted list of expressions. 5352 5353 This helper defines a pattern of the form:: 5354 5355 integer expr expr expr... 5356 5357 where the leading integer tells how many expr expressions follow. 5358 The matched tokens returns the array of expr tokens as a list - the 5359 leading count token is suppressed. 5360 5361 If ``intExpr`` is specified, it should be a pyparsing expression 5362 that produces an integer value. 5363 5364 Example:: 5365 5366 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 5367 5368 # in this parser, the leading integer value is given in binary, 5369 # '10' indicating that 2 values are in the array 5370 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 5371 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 5372 """ 5373 arrayExpr = Forward() 5374 def countFieldParseAction(s, l, t): 5375 n = t[0] 5376 arrayExpr << (n and Group(And([expr] * n)) or Group(empty)) 5377 return [] 5378 if intExpr is None: 5379 intExpr = Word(nums).setParseAction(lambda t: int(t[0])) 5380 else: 5381 intExpr = intExpr.copy() 5382 intExpr.setName("arrayLen") 5383 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 5384 return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...') 5385 5386def _flatten(L): 5387 ret = [] 5388 for i in L: 5389 if isinstance(i, list): 5390 ret.extend(_flatten(i)) 5391 else: 5392 ret.append(i) 5393 return ret 5394 5395def matchPreviousLiteral(expr): 5396 """Helper to define an expression that is indirectly defined from 5397 the tokens matched in a previous expression, that is, it looks for 5398 a 'repeat' of a previous expression. For example:: 5399 5400 first = Word(nums) 5401 second = matchPreviousLiteral(first) 5402 matchExpr = first + ":" + second 5403 5404 will match ``"1:1"``, but not ``"1:2"``. Because this 5405 matches a previous literal, will also match the leading 5406 ``"1:1"`` in ``"1:10"``. If this is not desired, use 5407 :class:`matchPreviousExpr`. Do *not* use with packrat parsing 5408 enabled. 5409 """ 5410 rep = Forward() 5411 def copyTokenToRepeater(s, l, t): 5412 if t: 5413 if len(t) == 1: 5414 rep << t[0] 5415 else: 5416 # flatten t tokens 5417 tflat = _flatten(t.asList()) 5418 rep << And(Literal(tt) for tt in tflat) 5419 else: 5420 rep << Empty() 5421 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 5422 rep.setName('(prev) ' + _ustr(expr)) 5423 return rep 5424 5425def matchPreviousExpr(expr): 5426 """Helper to define an expression that is indirectly defined from 5427 the tokens matched in a previous expression, that is, it looks for 5428 a 'repeat' of a previous expression. For example:: 5429 5430 first = Word(nums) 5431 second = matchPreviousExpr(first) 5432 matchExpr = first + ":" + second 5433 5434 will match ``"1:1"``, but not ``"1:2"``. Because this 5435 matches by expressions, will *not* match the leading ``"1:1"`` 5436 in ``"1:10"``; the expressions are evaluated first, and then 5437 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 5438 with packrat parsing enabled. 5439 """ 5440 rep = Forward() 5441 e2 = expr.copy() 5442 rep <<= e2 5443 def copyTokenToRepeater(s, l, t): 5444 matchTokens = _flatten(t.asList()) 5445 def mustMatchTheseTokens(s, l, t): 5446 theseTokens = _flatten(t.asList()) 5447 if theseTokens != matchTokens: 5448 raise ParseException('', 0, '') 5449 rep.setParseAction(mustMatchTheseTokens, callDuringTry=True) 5450 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 5451 rep.setName('(prev) ' + _ustr(expr)) 5452 return rep 5453 5454def _escapeRegexRangeChars(s): 5455 # ~ escape these chars: ^-[] 5456 for c in r"\^-[]": 5457 s = s.replace(c, _bslash + c) 5458 s = s.replace("\n", r"\n") 5459 s = s.replace("\t", r"\t") 5460 return _ustr(s) 5461 5462def oneOf(strs, caseless=False, useRegex=True, asKeyword=False): 5463 """Helper to quickly define a set of alternative Literals, and makes 5464 sure to do longest-first testing when there is a conflict, 5465 regardless of the input order, but returns 5466 a :class:`MatchFirst` for best performance. 5467 5468 Parameters: 5469 5470 - strs - a string of space-delimited literals, or a collection of 5471 string literals 5472 - caseless - (default= ``False``) - treat all literals as 5473 caseless 5474 - useRegex - (default= ``True``) - as an optimization, will 5475 generate a Regex object; otherwise, will generate 5476 a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if 5477 creating a :class:`Regex` raises an exception) 5478 - asKeyword - (default=``False``) - enforce Keyword-style matching on the 5479 generated expressions 5480 5481 Example:: 5482 5483 comp_oper = oneOf("< = > <= >= !=") 5484 var = Word(alphas) 5485 number = Word(nums) 5486 term = var | number 5487 comparison_expr = term + comp_oper + term 5488 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 5489 5490 prints:: 5491 5492 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 5493 """ 5494 if isinstance(caseless, basestring): 5495 warnings.warn("More than one string argument passed to oneOf, pass " 5496 "choices as a list or space-delimited string", stacklevel=2) 5497 5498 if caseless: 5499 isequal = (lambda a, b: a.upper() == b.upper()) 5500 masks = (lambda a, b: b.upper().startswith(a.upper())) 5501 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral 5502 else: 5503 isequal = (lambda a, b: a == b) 5504 masks = (lambda a, b: b.startswith(a)) 5505 parseElementClass = Keyword if asKeyword else Literal 5506 5507 symbols = [] 5508 if isinstance(strs, basestring): 5509 symbols = strs.split() 5510 elif isinstance(strs, Iterable): 5511 symbols = list(strs) 5512 else: 5513 warnings.warn("Invalid argument to oneOf, expected string or iterable", 5514 SyntaxWarning, stacklevel=2) 5515 if not symbols: 5516 return NoMatch() 5517 5518 if not asKeyword: 5519 # if not producing keywords, need to reorder to take care to avoid masking 5520 # longer choices with shorter ones 5521 i = 0 5522 while i < len(symbols) - 1: 5523 cur = symbols[i] 5524 for j, other in enumerate(symbols[i + 1:]): 5525 if isequal(other, cur): 5526 del symbols[i + j + 1] 5527 break 5528 elif masks(cur, other): 5529 del symbols[i + j + 1] 5530 symbols.insert(i, other) 5531 break 5532 else: 5533 i += 1 5534 5535 if not (caseless or asKeyword) and useRegex: 5536 # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols])) 5537 try: 5538 if len(symbols) == len("".join(symbols)): 5539 return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols)) 5540 else: 5541 return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols)) 5542 except Exception: 5543 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 5544 SyntaxWarning, stacklevel=2) 5545 5546 # last resort, just use MatchFirst 5547 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) 5548 5549def dictOf(key, value): 5550 """Helper to easily and clearly define a dictionary by specifying 5551 the respective patterns for the key and value. Takes care of 5552 defining the :class:`Dict`, :class:`ZeroOrMore`, and 5553 :class:`Group` tokens in the proper order. The key pattern 5554 can include delimiting markers or punctuation, as long as they are 5555 suppressed, thereby leaving the significant key text. The value 5556 pattern can include named results, so that the :class:`Dict` results 5557 can include named token fields. 5558 5559 Example:: 5560 5561 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 5562 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 5563 print(OneOrMore(attr_expr).parseString(text).dump()) 5564 5565 attr_label = label 5566 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 5567 5568 # similar to Dict, but simpler call format 5569 result = dictOf(attr_label, attr_value).parseString(text) 5570 print(result.dump()) 5571 print(result['shape']) 5572 print(result.shape) # object attribute access works too 5573 print(result.asDict()) 5574 5575 prints:: 5576 5577 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 5578 - color: light blue 5579 - posn: upper left 5580 - shape: SQUARE 5581 - texture: burlap 5582 SQUARE 5583 SQUARE 5584 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 5585 """ 5586 return Dict(OneOrMore(Group(key + value))) 5587 5588def originalTextFor(expr, asString=True): 5589 """Helper to return the original, untokenized text for a given 5590 expression. Useful to restore the parsed fields of an HTML start 5591 tag into the raw tag text itself, or to revert separate tokens with 5592 intervening whitespace back to the original matching input text. By 5593 default, returns astring containing the original parsed text. 5594 5595 If the optional ``asString`` argument is passed as 5596 ``False``, then the return value is 5597 a :class:`ParseResults` containing any results names that 5598 were originally matched, and a single token containing the original 5599 matched text from the input string. So if the expression passed to 5600 :class:`originalTextFor` contains expressions with defined 5601 results names, you must set ``asString`` to ``False`` if you 5602 want to preserve those results name values. 5603 5604 Example:: 5605 5606 src = "this is test <b> bold <i>text</i> </b> normal text " 5607 for tag in ("b", "i"): 5608 opener, closer = makeHTMLTags(tag) 5609 patt = originalTextFor(opener + SkipTo(closer) + closer) 5610 print(patt.searchString(src)[0]) 5611 5612 prints:: 5613 5614 ['<b> bold <i>text</i> </b>'] 5615 ['<i>text</i>'] 5616 """ 5617 locMarker = Empty().setParseAction(lambda s, loc, t: loc) 5618 endlocMarker = locMarker.copy() 5619 endlocMarker.callPreparse = False 5620 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 5621 if asString: 5622 extractText = lambda s, l, t: s[t._original_start: t._original_end] 5623 else: 5624 def extractText(s, l, t): 5625 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] 5626 matchExpr.setParseAction(extractText) 5627 matchExpr.ignoreExprs = expr.ignoreExprs 5628 return matchExpr 5629 5630def ungroup(expr): 5631 """Helper to undo pyparsing's default grouping of And expressions, 5632 even if all but one are non-empty. 5633 """ 5634 return TokenConverter(expr).addParseAction(lambda t: t[0]) 5635 5636def locatedExpr(expr): 5637 """Helper to decorate a returned token with its starting and ending 5638 locations in the input string. 5639 5640 This helper adds the following results names: 5641 5642 - locn_start = location where matched expression begins 5643 - locn_end = location where matched expression ends 5644 - value = the actual parsed results 5645 5646 Be careful if the input text contains ``<TAB>`` characters, you 5647 may want to call :class:`ParserElement.parseWithTabs` 5648 5649 Example:: 5650 5651 wd = Word(alphas) 5652 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 5653 print(match) 5654 5655 prints:: 5656 5657 [[0, 'ljsdf', 5]] 5658 [[8, 'lksdjjf', 15]] 5659 [[18, 'lkkjj', 23]] 5660 """ 5661 locator = Empty().setParseAction(lambda s, l, t: l) 5662 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) 5663 5664 5665# convenience constants for positional expressions 5666empty = Empty().setName("empty") 5667lineStart = LineStart().setName("lineStart") 5668lineEnd = LineEnd().setName("lineEnd") 5669stringStart = StringStart().setName("stringStart") 5670stringEnd = StringEnd().setName("stringEnd") 5671 5672_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1]) 5673_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16))) 5674_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8))) 5675_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) 5676_charRange = Group(_singleChar + Suppress("-") + _singleChar) 5677_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]" 5678 5679def srange(s): 5680 r"""Helper to easily define string ranges for use in Word 5681 construction. Borrows syntax from regexp '[]' string range 5682 definitions:: 5683 5684 srange("[0-9]") -> "0123456789" 5685 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 5686 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 5687 5688 The input string must be enclosed in []'s, and the returned string 5689 is the expanded character set joined into a single string. The 5690 values enclosed in the []'s may be: 5691 5692 - a single character 5693 - an escaped character with a leading backslash (such as ``\-`` 5694 or ``\]``) 5695 - an escaped hex character with a leading ``'\x'`` 5696 (``\x21``, which is a ``'!'`` character) (``\0x##`` 5697 is also supported for backwards compatibility) 5698 - an escaped octal character with a leading ``'\0'`` 5699 (``\041``, which is a ``'!'`` character) 5700 - a range of any of the above, separated by a dash (``'a-z'``, 5701 etc.) 5702 - any combination of the above (``'aeiouy'``, 5703 ``'a-zA-Z0-9_$'``, etc.) 5704 """ 5705 _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 5706 try: 5707 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 5708 except Exception: 5709 return "" 5710 5711def matchOnlyAtCol(n): 5712 """Helper method for defining parse actions that require matching at 5713 a specific column in the input text. 5714 """ 5715 def verifyCol(strg, locn, toks): 5716 if col(locn, strg) != n: 5717 raise ParseException(strg, locn, "matched token not at column %d" % n) 5718 return verifyCol 5719 5720def replaceWith(replStr): 5721 """Helper method for common parse actions that simply return 5722 a literal value. Especially useful when used with 5723 :class:`transformString<ParserElement.transformString>` (). 5724 5725 Example:: 5726 5727 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 5728 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 5729 term = na | num 5730 5731 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 5732 """ 5733 return lambda s, l, t: [replStr] 5734 5735def removeQuotes(s, l, t): 5736 """Helper parse action for removing quotation marks from parsed 5737 quoted strings. 5738 5739 Example:: 5740 5741 # by default, quotation marks are included in parsed results 5742 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 5743 5744 # use removeQuotes to strip quotation marks from parsed results 5745 quotedString.setParseAction(removeQuotes) 5746 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 5747 """ 5748 return t[0][1:-1] 5749 5750def tokenMap(func, *args): 5751 """Helper to define a parse action by mapping a function to all 5752 elements of a ParseResults list. If any additional args are passed, 5753 they are forwarded to the given function as additional arguments 5754 after the token, as in 5755 ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``, 5756 which will convert the parsed data to an integer using base 16. 5757 5758 Example (compare the last to example in :class:`ParserElement.transformString`:: 5759 5760 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 5761 hex_ints.runTests(''' 5762 00 11 22 aa FF 0a 0d 1a 5763 ''') 5764 5765 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 5766 OneOrMore(upperword).runTests(''' 5767 my kingdom for a horse 5768 ''') 5769 5770 wd = Word(alphas).setParseAction(tokenMap(str.title)) 5771 OneOrMore(wd).setParseAction(' '.join).runTests(''' 5772 now is the winter of our discontent made glorious summer by this sun of york 5773 ''') 5774 5775 prints:: 5776 5777 00 11 22 aa FF 0a 0d 1a 5778 [0, 17, 34, 170, 255, 10, 13, 26] 5779 5780 my kingdom for a horse 5781 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 5782 5783 now is the winter of our discontent made glorious summer by this sun of york 5784 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 5785 """ 5786 def pa(s, l, t): 5787 return [func(tokn, *args) for tokn in t] 5788 5789 try: 5790 func_name = getattr(func, '__name__', 5791 getattr(func, '__class__').__name__) 5792 except Exception: 5793 func_name = str(func) 5794 pa.__name__ = func_name 5795 5796 return pa 5797 5798upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 5799"""(Deprecated) Helper parse action to convert tokens to upper case. 5800Deprecated in favor of :class:`pyparsing_common.upcaseTokens`""" 5801 5802downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 5803"""(Deprecated) Helper parse action to convert tokens to lower case. 5804Deprecated in favor of :class:`pyparsing_common.downcaseTokens`""" 5805 5806def _makeTags(tagStr, xml, 5807 suppress_LT=Suppress("<"), 5808 suppress_GT=Suppress(">")): 5809 """Internal helper to construct opening and closing tag expressions, given a tag name""" 5810 if isinstance(tagStr, basestring): 5811 resname = tagStr 5812 tagStr = Keyword(tagStr, caseless=not xml) 5813 else: 5814 resname = tagStr.name 5815 5816 tagAttrName = Word(alphas, alphanums + "_-:") 5817 if xml: 5818 tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) 5819 openTag = (suppress_LT 5820 + tagStr("tag") 5821 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 5822 + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') 5823 + suppress_GT) 5824 else: 5825 tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">") 5826 openTag = (suppress_LT 5827 + tagStr("tag") 5828 + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) 5829 + Optional(Suppress("=") + tagAttrValue)))) 5830 + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') 5831 + suppress_GT) 5832 closeTag = Combine(_L("</") + tagStr + ">", adjacent=False) 5833 5834 openTag.setName("<%s>" % resname) 5835 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 5836 openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy())) 5837 closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("</%s>" % resname) 5838 openTag.tag = resname 5839 closeTag.tag = resname 5840 openTag.tag_body = SkipTo(closeTag()) 5841 return openTag, closeTag 5842 5843def makeHTMLTags(tagStr): 5844 """Helper to construct opening and closing tag expressions for HTML, 5845 given a tag name. Matches tags in either upper or lower case, 5846 attributes with namespaces and with quoted or unquoted values. 5847 5848 Example:: 5849 5850 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 5851 # makeHTMLTags returns pyparsing expressions for the opening and 5852 # closing tags as a 2-tuple 5853 a, a_end = makeHTMLTags("A") 5854 link_expr = a + SkipTo(a_end)("link_text") + a_end 5855 5856 for link in link_expr.searchString(text): 5857 # attributes in the <A> tag (like "href" shown here) are 5858 # also accessible as named results 5859 print(link.link_text, '->', link.href) 5860 5861 prints:: 5862 5863 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 5864 """ 5865 return _makeTags(tagStr, False) 5866 5867def makeXMLTags(tagStr): 5868 """Helper to construct opening and closing tag expressions for XML, 5869 given a tag name. Matches tags only in the given upper/lower case. 5870 5871 Example: similar to :class:`makeHTMLTags` 5872 """ 5873 return _makeTags(tagStr, True) 5874 5875def withAttribute(*args, **attrDict): 5876 """Helper to create a validating parse action to be used with start 5877 tags created with :class:`makeXMLTags` or 5878 :class:`makeHTMLTags`. Use ``withAttribute`` to qualify 5879 a starting tag with a required attribute value, to avoid false 5880 matches on common tags such as ``<TD>`` or ``<DIV>``. 5881 5882 Call ``withAttribute`` with a series of attribute names and 5883 values. Specify the list of filter attributes names and values as: 5884 5885 - keyword arguments, as in ``(align="right")``, or 5886 - as an explicit dict with ``**`` operator, when an attribute 5887 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` 5888 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` 5889 5890 For attribute names with a namespace prefix, you must use the second 5891 form. Attribute names are matched insensitive to upper/lower case. 5892 5893 If just testing for ``class`` (with or without a namespace), use 5894 :class:`withClass`. 5895 5896 To verify that the attribute exists, but without specifying a value, 5897 pass ``withAttribute.ANY_VALUE`` as the value. 5898 5899 Example:: 5900 5901 html = ''' 5902 <div> 5903 Some text 5904 <div type="grid">1 4 0 1 0</div> 5905 <div type="graph">1,3 2,3 1,1</div> 5906 <div>this has no type</div> 5907 </div> 5908 5909 ''' 5910 div,div_end = makeHTMLTags("div") 5911 5912 # only match div tag having a type attribute with value "grid" 5913 div_grid = div().setParseAction(withAttribute(type="grid")) 5914 grid_expr = div_grid + SkipTo(div | div_end)("body") 5915 for grid_header in grid_expr.searchString(html): 5916 print(grid_header.body) 5917 5918 # construct a match with any div tag having a type attribute, regardless of the value 5919 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 5920 div_expr = div_any_type + SkipTo(div | div_end)("body") 5921 for div_header in div_expr.searchString(html): 5922 print(div_header.body) 5923 5924 prints:: 5925 5926 1 4 0 1 0 5927 5928 1 4 0 1 0 5929 1,3 2,3 1,1 5930 """ 5931 if args: 5932 attrs = args[:] 5933 else: 5934 attrs = attrDict.items() 5935 attrs = [(k, v) for k, v in attrs] 5936 def pa(s, l, tokens): 5937 for attrName, attrValue in attrs: 5938 if attrName not in tokens: 5939 raise ParseException(s, l, "no matching attribute " + attrName) 5940 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 5941 raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" % 5942 (attrName, tokens[attrName], attrValue)) 5943 return pa 5944withAttribute.ANY_VALUE = object() 5945 5946def withClass(classname, namespace=''): 5947 """Simplified version of :class:`withAttribute` when 5948 matching on a div class - made difficult because ``class`` is 5949 a reserved word in Python. 5950 5951 Example:: 5952 5953 html = ''' 5954 <div> 5955 Some text 5956 <div class="grid">1 4 0 1 0</div> 5957 <div class="graph">1,3 2,3 1,1</div> 5958 <div>this <div> has no class</div> 5959 </div> 5960 5961 ''' 5962 div,div_end = makeHTMLTags("div") 5963 div_grid = div().setParseAction(withClass("grid")) 5964 5965 grid_expr = div_grid + SkipTo(div | div_end)("body") 5966 for grid_header in grid_expr.searchString(html): 5967 print(grid_header.body) 5968 5969 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 5970 div_expr = div_any_type + SkipTo(div | div_end)("body") 5971 for div_header in div_expr.searchString(html): 5972 print(div_header.body) 5973 5974 prints:: 5975 5976 1 4 0 1 0 5977 5978 1 4 0 1 0 5979 1,3 2,3 1,1 5980 """ 5981 classattr = "%s:class" % namespace if namespace else "class" 5982 return withAttribute(**{classattr: classname}) 5983 5984opAssoc = SimpleNamespace() 5985opAssoc.LEFT = object() 5986opAssoc.RIGHT = object() 5987 5988def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')): 5989 """Helper method for constructing grammars of expressions made up of 5990 operators working in a precedence hierarchy. Operators may be unary 5991 or binary, left- or right-associative. Parse actions can also be 5992 attached to operator expressions. The generated parser will also 5993 recognize the use of parentheses to override operator precedences 5994 (see example below). 5995 5996 Note: if you define a deep operator list, you may see performance 5997 issues when using infixNotation. See 5998 :class:`ParserElement.enablePackrat` for a mechanism to potentially 5999 improve your parser performance. 6000 6001 Parameters: 6002 - baseExpr - expression representing the most basic element for the 6003 nested 6004 - opList - list of tuples, one for each operator precedence level 6005 in the expression grammar; each tuple is of the form ``(opExpr, 6006 numTerms, rightLeftAssoc, parseAction)``, where: 6007 6008 - opExpr is the pyparsing expression for the operator; may also 6009 be a string, which will be converted to a Literal; if numTerms 6010 is 3, opExpr is a tuple of two expressions, for the two 6011 operators separating the 3 terms 6012 - numTerms is the number of terms for this operator (must be 1, 6013 2, or 3) 6014 - rightLeftAssoc is the indicator whether the operator is right 6015 or left associative, using the pyparsing-defined constants 6016 ``opAssoc.RIGHT`` and ``opAssoc.LEFT``. 6017 - parseAction is the parse action to be associated with 6018 expressions matching this operator expression (the parse action 6019 tuple member may be omitted); if the parse action is passed 6020 a tuple or list of functions, this is equivalent to calling 6021 ``setParseAction(*fn)`` 6022 (:class:`ParserElement.setParseAction`) 6023 - lpar - expression for matching left-parentheses 6024 (default= ``Suppress('(')``) 6025 - rpar - expression for matching right-parentheses 6026 (default= ``Suppress(')')``) 6027 6028 Example:: 6029 6030 # simple example of four-function arithmetic with ints and 6031 # variable names 6032 integer = pyparsing_common.signed_integer 6033 varname = pyparsing_common.identifier 6034 6035 arith_expr = infixNotation(integer | varname, 6036 [ 6037 ('-', 1, opAssoc.RIGHT), 6038 (oneOf('* /'), 2, opAssoc.LEFT), 6039 (oneOf('+ -'), 2, opAssoc.LEFT), 6040 ]) 6041 6042 arith_expr.runTests(''' 6043 5+3*6 6044 (5+3)*6 6045 -2--11 6046 ''', fullDump=False) 6047 6048 prints:: 6049 6050 5+3*6 6051 [[5, '+', [3, '*', 6]]] 6052 6053 (5+3)*6 6054 [[[5, '+', 3], '*', 6]] 6055 6056 -2--11 6057 [[['-', 2], '-', ['-', 11]]] 6058 """ 6059 # captive version of FollowedBy that does not do parse actions or capture results names 6060 class _FB(FollowedBy): 6061 def parseImpl(self, instring, loc, doActions=True): 6062 self.expr.tryParse(instring, loc) 6063 return loc, [] 6064 6065 ret = Forward() 6066 lastExpr = baseExpr | (lpar + ret + rpar) 6067 for i, operDef in enumerate(opList): 6068 opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4] 6069 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 6070 if arity == 3: 6071 if opExpr is None or len(opExpr) != 2: 6072 raise ValueError( 6073 "if numterms=3, opExpr must be a tuple or list of two expressions") 6074 opExpr1, opExpr2 = opExpr 6075 thisExpr = Forward().setName(termName) 6076 if rightLeftAssoc == opAssoc.LEFT: 6077 if arity == 1: 6078 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr)) 6079 elif arity == 2: 6080 if opExpr is not None: 6081 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr)) 6082 else: 6083 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr)) 6084 elif arity == 3: 6085 matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) 6086 + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))) 6087 else: 6088 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 6089 elif rightLeftAssoc == opAssoc.RIGHT: 6090 if arity == 1: 6091 # try to avoid LR with this extra test 6092 if not isinstance(opExpr, Optional): 6093 opExpr = Optional(opExpr) 6094 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) 6095 elif arity == 2: 6096 if opExpr is not None: 6097 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr)) 6098 else: 6099 matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr)) 6100 elif arity == 3: 6101 matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 6102 + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)) 6103 else: 6104 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 6105 else: 6106 raise ValueError("operator must indicate right or left associativity") 6107 if pa: 6108 if isinstance(pa, (tuple, list)): 6109 matchExpr.setParseAction(*pa) 6110 else: 6111 matchExpr.setParseAction(pa) 6112 thisExpr <<= (matchExpr.setName(termName) | lastExpr) 6113 lastExpr = thisExpr 6114 ret <<= lastExpr 6115 return ret 6116 6117operatorPrecedence = infixNotation 6118"""(Deprecated) Former name of :class:`infixNotation`, will be 6119dropped in a future release.""" 6120 6121dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes") 6122sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes") 6123quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 6124 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes") 6125unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") 6126 6127def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): 6128 """Helper method for defining nested lists enclosed in opening and 6129 closing delimiters ("(" and ")" are the default). 6130 6131 Parameters: 6132 - opener - opening character for a nested list 6133 (default= ``"("``); can also be a pyparsing expression 6134 - closer - closing character for a nested list 6135 (default= ``")"``); can also be a pyparsing expression 6136 - content - expression for items within the nested lists 6137 (default= ``None``) 6138 - ignoreExpr - expression for ignoring opening and closing 6139 delimiters (default= :class:`quotedString`) 6140 6141 If an expression is not provided for the content argument, the 6142 nested expression will capture all whitespace-delimited content 6143 between delimiters as a list of separate values. 6144 6145 Use the ``ignoreExpr`` argument to define expressions that may 6146 contain opening or closing characters that should not be treated as 6147 opening or closing characters for nesting, such as quotedString or 6148 a comment expression. Specify multiple expressions using an 6149 :class:`Or` or :class:`MatchFirst`. The default is 6150 :class:`quotedString`, but if no expressions are to be ignored, then 6151 pass ``None`` for this argument. 6152 6153 Example:: 6154 6155 data_type = oneOf("void int short long char float double") 6156 decl_data_type = Combine(data_type + Optional(Word('*'))) 6157 ident = Word(alphas+'_', alphanums+'_') 6158 number = pyparsing_common.number 6159 arg = Group(decl_data_type + ident) 6160 LPAR, RPAR = map(Suppress, "()") 6161 6162 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 6163 6164 c_function = (decl_data_type("type") 6165 + ident("name") 6166 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 6167 + code_body("body")) 6168 c_function.ignore(cStyleComment) 6169 6170 source_code = ''' 6171 int is_odd(int x) { 6172 return (x%2); 6173 } 6174 6175 int dec_to_hex(char hchar) { 6176 if (hchar >= '0' && hchar <= '9') { 6177 return (ord(hchar)-ord('0')); 6178 } else { 6179 return (10+ord(hchar)-ord('A')); 6180 } 6181 } 6182 ''' 6183 for func in c_function.searchString(source_code): 6184 print("%(name)s (%(type)s) args: %(args)s" % func) 6185 6186 6187 prints:: 6188 6189 is_odd (int) args: [['int', 'x']] 6190 dec_to_hex (int) args: [['char', 'hchar']] 6191 """ 6192 if opener == closer: 6193 raise ValueError("opening and closing strings cannot be the same") 6194 if content is None: 6195 if isinstance(opener, basestring) and isinstance(closer, basestring): 6196 if len(opener) == 1 and len(closer) == 1: 6197 if ignoreExpr is not None: 6198 content = (Combine(OneOrMore(~ignoreExpr 6199 + CharsNotIn(opener 6200 + closer 6201 + ParserElement.DEFAULT_WHITE_CHARS, exact=1) 6202 ) 6203 ).setParseAction(lambda t: t[0].strip())) 6204 else: 6205 content = (empty.copy() + CharsNotIn(opener 6206 + closer 6207 + ParserElement.DEFAULT_WHITE_CHARS 6208 ).setParseAction(lambda t: t[0].strip())) 6209 else: 6210 if ignoreExpr is not None: 6211 content = (Combine(OneOrMore(~ignoreExpr 6212 + ~Literal(opener) 6213 + ~Literal(closer) 6214 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) 6215 ).setParseAction(lambda t: t[0].strip())) 6216 else: 6217 content = (Combine(OneOrMore(~Literal(opener) 6218 + ~Literal(closer) 6219 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) 6220 ).setParseAction(lambda t: t[0].strip())) 6221 else: 6222 raise ValueError("opening and closing arguments must be strings if no content expression is given") 6223 ret = Forward() 6224 if ignoreExpr is not None: 6225 ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)) 6226 else: 6227 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 6228 ret.setName('nested %s%s expression' % (opener, closer)) 6229 return ret 6230 6231def indentedBlock(blockStatementExpr, indentStack, indent=True): 6232 """Helper method for defining space-delimited indentation blocks, 6233 such as those used to define block statements in Python source code. 6234 6235 Parameters: 6236 6237 - blockStatementExpr - expression defining syntax of statement that 6238 is repeated within the indented block 6239 - indentStack - list created by caller to manage indentation stack 6240 (multiple statementWithIndentedBlock expressions within a single 6241 grammar should share a common indentStack) 6242 - indent - boolean indicating whether block must be indented beyond 6243 the current level; set to False for block of left-most 6244 statements (default= ``True``) 6245 6246 A valid block must contain at least one ``blockStatement``. 6247 6248 Example:: 6249 6250 data = ''' 6251 def A(z): 6252 A1 6253 B = 100 6254 G = A2 6255 A2 6256 A3 6257 B 6258 def BB(a,b,c): 6259 BB1 6260 def BBA(): 6261 bba1 6262 bba2 6263 bba3 6264 C 6265 D 6266 def spam(x,y): 6267 def eggs(z): 6268 pass 6269 ''' 6270 6271 6272 indentStack = [1] 6273 stmt = Forward() 6274 6275 identifier = Word(alphas, alphanums) 6276 funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":") 6277 func_body = indentedBlock(stmt, indentStack) 6278 funcDef = Group(funcDecl + func_body) 6279 6280 rvalue = Forward() 6281 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 6282 rvalue << (funcCall | identifier | Word(nums)) 6283 assignment = Group(identifier + "=" + rvalue) 6284 stmt << (funcDef | assignment | identifier) 6285 6286 module_body = OneOrMore(stmt) 6287 6288 parseTree = module_body.parseString(data) 6289 parseTree.pprint() 6290 6291 prints:: 6292 6293 [['def', 6294 'A', 6295 ['(', 'z', ')'], 6296 ':', 6297 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 6298 'B', 6299 ['def', 6300 'BB', 6301 ['(', 'a', 'b', 'c', ')'], 6302 ':', 6303 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 6304 'C', 6305 'D', 6306 ['def', 6307 'spam', 6308 ['(', 'x', 'y', ')'], 6309 ':', 6310 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 6311 """ 6312 backup_stack = indentStack[:] 6313 6314 def reset_stack(): 6315 indentStack[:] = backup_stack 6316 6317 def checkPeerIndent(s, l, t): 6318 if l >= len(s): return 6319 curCol = col(l, s) 6320 if curCol != indentStack[-1]: 6321 if curCol > indentStack[-1]: 6322 raise ParseException(s, l, "illegal nesting") 6323 raise ParseException(s, l, "not a peer entry") 6324 6325 def checkSubIndent(s, l, t): 6326 curCol = col(l, s) 6327 if curCol > indentStack[-1]: 6328 indentStack.append(curCol) 6329 else: 6330 raise ParseException(s, l, "not a subentry") 6331 6332 def checkUnindent(s, l, t): 6333 if l >= len(s): return 6334 curCol = col(l, s) 6335 if not(indentStack and curCol in indentStack): 6336 raise ParseException(s, l, "not an unindent") 6337 if curCol < indentStack[-1]: 6338 indentStack.pop() 6339 6340 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd()) 6341 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 6342 PEER = Empty().setParseAction(checkPeerIndent).setName('') 6343 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 6344 if indent: 6345 smExpr = Group(Optional(NL) 6346 + INDENT 6347 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) 6348 + UNDENT) 6349 else: 6350 smExpr = Group(Optional(NL) 6351 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) 6352 + UNDENT) 6353 smExpr.setFailAction(lambda a, b, c, d: reset_stack()) 6354 blockStatementExpr.ignore(_bslash + LineEnd()) 6355 return smExpr.setName('indented block') 6356 6357alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 6358punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 6359 6360anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag')) 6361_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\'')) 6362commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") 6363def replaceHTMLEntity(t): 6364 """Helper parser action to replace common HTML entities with their special characters""" 6365 return _htmlEntityMap.get(t.entity) 6366 6367# it's easy to get these comment structures wrong - they're very common, so may as well make them available 6368cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 6369"Comment of the form ``/* ... */``" 6370 6371htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 6372"Comment of the form ``<!-- ... -->``" 6373 6374restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 6375dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 6376"Comment of the form ``// ... (to end of line)``" 6377 6378cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment") 6379"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`" 6380 6381javaStyleComment = cppStyleComment 6382"Same as :class:`cppStyleComment`" 6383 6384pythonStyleComment = Regex(r"#.*").setName("Python style comment") 6385"Comment of the form ``# ... (to end of line)``" 6386 6387_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') 6388 + Optional(Word(" \t") 6389 + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem") 6390commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList") 6391"""(Deprecated) Predefined expression of 1 or more printable words or 6392quoted strings, separated by commas. 6393 6394This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`. 6395""" 6396 6397# some other useful expressions - using lower-case class name since we are really using this as a namespace 6398class pyparsing_common: 6399 """Here are some common low-level expressions that may be useful in 6400 jump-starting parser development: 6401 6402 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`, 6403 :class:`scientific notation<sci_real>`) 6404 - common :class:`programming identifiers<identifier>` 6405 - network addresses (:class:`MAC<mac_address>`, 6406 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`) 6407 - ISO8601 :class:`dates<iso8601_date>` and 6408 :class:`datetime<iso8601_datetime>` 6409 - :class:`UUID<uuid>` 6410 - :class:`comma-separated list<comma_separated_list>` 6411 6412 Parse actions: 6413 6414 - :class:`convertToInteger` 6415 - :class:`convertToFloat` 6416 - :class:`convertToDate` 6417 - :class:`convertToDatetime` 6418 - :class:`stripHTMLTags` 6419 - :class:`upcaseTokens` 6420 - :class:`downcaseTokens` 6421 6422 Example:: 6423 6424 pyparsing_common.number.runTests(''' 6425 # any int or real number, returned as the appropriate type 6426 100 6427 -100 6428 +100 6429 3.14159 6430 6.02e23 6431 1e-12 6432 ''') 6433 6434 pyparsing_common.fnumber.runTests(''' 6435 # any int or real number, returned as float 6436 100 6437 -100 6438 +100 6439 3.14159 6440 6.02e23 6441 1e-12 6442 ''') 6443 6444 pyparsing_common.hex_integer.runTests(''' 6445 # hex numbers 6446 100 6447 FF 6448 ''') 6449 6450 pyparsing_common.fraction.runTests(''' 6451 # fractions 6452 1/2 6453 -3/4 6454 ''') 6455 6456 pyparsing_common.mixed_integer.runTests(''' 6457 # mixed fractions 6458 1 6459 1/2 6460 -3/4 6461 1-3/4 6462 ''') 6463 6464 import uuid 6465 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 6466 pyparsing_common.uuid.runTests(''' 6467 # uuid 6468 12345678-1234-5678-1234-567812345678 6469 ''') 6470 6471 prints:: 6472 6473 # any int or real number, returned as the appropriate type 6474 100 6475 [100] 6476 6477 -100 6478 [-100] 6479 6480 +100 6481 [100] 6482 6483 3.14159 6484 [3.14159] 6485 6486 6.02e23 6487 [6.02e+23] 6488 6489 1e-12 6490 [1e-12] 6491 6492 # any int or real number, returned as float 6493 100 6494 [100.0] 6495 6496 -100 6497 [-100.0] 6498 6499 +100 6500 [100.0] 6501 6502 3.14159 6503 [3.14159] 6504 6505 6.02e23 6506 [6.02e+23] 6507 6508 1e-12 6509 [1e-12] 6510 6511 # hex numbers 6512 100 6513 [256] 6514 6515 FF 6516 [255] 6517 6518 # fractions 6519 1/2 6520 [0.5] 6521 6522 -3/4 6523 [-0.75] 6524 6525 # mixed fractions 6526 1 6527 [1] 6528 6529 1/2 6530 [0.5] 6531 6532 -3/4 6533 [-0.75] 6534 6535 1-3/4 6536 [1.75] 6537 6538 # uuid 6539 12345678-1234-5678-1234-567812345678 6540 [UUID('12345678-1234-5678-1234-567812345678')] 6541 """ 6542 6543 convertToInteger = tokenMap(int) 6544 """ 6545 Parse action for converting parsed integers to Python int 6546 """ 6547 6548 convertToFloat = tokenMap(float) 6549 """ 6550 Parse action for converting parsed numbers to Python float 6551 """ 6552 6553 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 6554 """expression that parses an unsigned integer, returns an int""" 6555 6556 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16)) 6557 """expression that parses a hexadecimal integer, returns an int""" 6558 6559 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 6560 """expression that parses an integer with optional leading sign, returns an int""" 6561 6562 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 6563 """fractional expression of an integer divided by an integer, returns a float""" 6564 fraction.addParseAction(lambda t: t[0]/t[-1]) 6565 6566 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 6567 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 6568 mixed_integer.addParseAction(sum) 6569 6570 real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat) 6571 """expression that parses a floating point number and returns a float""" 6572 6573 sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 6574 """expression that parses a floating point number with optional 6575 scientific notation and returns a float""" 6576 6577 # streamlining this expression makes the docs nicer-looking 6578 number = (sci_real | real | signed_integer).streamline() 6579 """any numeric expression, returns the corresponding Python type""" 6580 6581 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 6582 """any int or real number, returned as float""" 6583 6584 identifier = Word(alphas + '_', alphanums + '_').setName("identifier") 6585 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 6586 6587 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 6588 "IPv4 address (``0.0.0.0 - 255.255.255.255``)" 6589 6590 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 6591 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address") 6592 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) 6593 + "::" 6594 + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) 6595 ).setName("short IPv6 address") 6596 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 6597 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 6598 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 6599 "IPv6 address (long, short, or mixed form)" 6600 6601 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 6602 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 6603 6604 @staticmethod 6605 def convertToDate(fmt="%Y-%m-%d"): 6606 """ 6607 Helper to create a parse action for converting parsed date string to Python datetime.date 6608 6609 Params - 6610 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) 6611 6612 Example:: 6613 6614 date_expr = pyparsing_common.iso8601_date.copy() 6615 date_expr.setParseAction(pyparsing_common.convertToDate()) 6616 print(date_expr.parseString("1999-12-31")) 6617 6618 prints:: 6619 6620 [datetime.date(1999, 12, 31)] 6621 """ 6622 def cvt_fn(s, l, t): 6623 try: 6624 return datetime.strptime(t[0], fmt).date() 6625 except ValueError as ve: 6626 raise ParseException(s, l, str(ve)) 6627 return cvt_fn 6628 6629 @staticmethod 6630 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): 6631 """Helper to create a parse action for converting parsed 6632 datetime string to Python datetime.datetime 6633 6634 Params - 6635 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) 6636 6637 Example:: 6638 6639 dt_expr = pyparsing_common.iso8601_datetime.copy() 6640 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 6641 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 6642 6643 prints:: 6644 6645 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 6646 """ 6647 def cvt_fn(s, l, t): 6648 try: 6649 return datetime.strptime(t[0], fmt) 6650 except ValueError as ve: 6651 raise ParseException(s, l, str(ve)) 6652 return cvt_fn 6653 6654 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 6655 "ISO8601 date (``yyyy-mm-dd``)" 6656 6657 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 6658 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" 6659 6660 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 6661 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" 6662 6663 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 6664 @staticmethod 6665 def stripHTMLTags(s, l, tokens): 6666 """Parse action to remove HTML tags from web page HTML source 6667 6668 Example:: 6669 6670 # strip HTML links from normal text 6671 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 6672 td, td_end = makeHTMLTags("TD") 6673 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 6674 print(table_text.parseString(text).body) 6675 6676 Prints:: 6677 6678 More info at the pyparsing wiki page 6679 """ 6680 return pyparsing_common._html_stripper.transformString(tokens[0]) 6681 6682 _commasepitem = Combine(OneOrMore(~Literal(",") 6683 + ~LineEnd() 6684 + Word(printables, excludeChars=',') 6685 + Optional(White(" \t")))).streamline().setName("commaItem") 6686 comma_separated_list = delimitedList(Optional(quotedString.copy() 6687 | _commasepitem, default='') 6688 ).setName("comma separated list") 6689 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 6690 6691 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 6692 """Parse action to convert tokens to upper case.""" 6693 6694 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 6695 """Parse action to convert tokens to lower case.""" 6696 6697 6698class _lazyclassproperty(object): 6699 def __init__(self, fn): 6700 self.fn = fn 6701 self.__doc__ = fn.__doc__ 6702 self.__name__ = fn.__name__ 6703 6704 def __get__(self, obj, cls): 6705 if cls is None: 6706 cls = type(obj) 6707 if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) 6708 for superclass in cls.__mro__[1:]): 6709 cls._intern = {} 6710 attrname = self.fn.__name__ 6711 if attrname not in cls._intern: 6712 cls._intern[attrname] = self.fn(cls) 6713 return cls._intern[attrname] 6714 6715 6716class unicode_set(object): 6717 """ 6718 A set of Unicode characters, for language-specific strings for 6719 ``alphas``, ``nums``, ``alphanums``, and ``printables``. 6720 A unicode_set is defined by a list of ranges in the Unicode character 6721 set, in a class attribute ``_ranges``, such as:: 6722 6723 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] 6724 6725 A unicode set can also be defined using multiple inheritance of other unicode sets:: 6726 6727 class CJK(Chinese, Japanese, Korean): 6728 pass 6729 """ 6730 _ranges = [] 6731 6732 @classmethod 6733 def _get_chars_for_ranges(cls): 6734 ret = [] 6735 for cc in cls.__mro__: 6736 if cc is unicode_set: 6737 break 6738 for rr in cc._ranges: 6739 ret.extend(range(rr[0], rr[-1] + 1)) 6740 return [unichr(c) for c in sorted(set(ret))] 6741 6742 @_lazyclassproperty 6743 def printables(cls): 6744 "all non-whitespace characters in this range" 6745 return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges())) 6746 6747 @_lazyclassproperty 6748 def alphas(cls): 6749 "all alphabetic characters in this range" 6750 return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges())) 6751 6752 @_lazyclassproperty 6753 def nums(cls): 6754 "all numeric digit characters in this range" 6755 return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges())) 6756 6757 @_lazyclassproperty 6758 def alphanums(cls): 6759 "all alphanumeric characters in this range" 6760 return cls.alphas + cls.nums 6761 6762 6763class pyparsing_unicode(unicode_set): 6764 """ 6765 A namespace class for defining common language unicode_sets. 6766 """ 6767 _ranges = [(32, sys.maxunicode)] 6768 6769 class Latin1(unicode_set): 6770 "Unicode set for Latin-1 Unicode Character Range" 6771 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] 6772 6773 class LatinA(unicode_set): 6774 "Unicode set for Latin-A Unicode Character Range" 6775 _ranges = [(0x0100, 0x017f),] 6776 6777 class LatinB(unicode_set): 6778 "Unicode set for Latin-B Unicode Character Range" 6779 _ranges = [(0x0180, 0x024f),] 6780 6781 class Greek(unicode_set): 6782 "Unicode set for Greek Unicode Character Ranges" 6783 _ranges = [ 6784 (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d), 6785 (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4), 6786 (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe), 6787 ] 6788 6789 class Cyrillic(unicode_set): 6790 "Unicode set for Cyrillic Unicode Character Range" 6791 _ranges = [(0x0400, 0x04ff)] 6792 6793 class Chinese(unicode_set): 6794 "Unicode set for Chinese Unicode Character Range" 6795 _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),] 6796 6797 class Japanese(unicode_set): 6798 "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" 6799 _ranges = [] 6800 6801 class Kanji(unicode_set): 6802 "Unicode set for Kanji Unicode Character Range" 6803 _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),] 6804 6805 class Hiragana(unicode_set): 6806 "Unicode set for Hiragana Unicode Character Range" 6807 _ranges = [(0x3040, 0x309f),] 6808 6809 class Katakana(unicode_set): 6810 "Unicode set for Katakana Unicode Character Range" 6811 _ranges = [(0x30a0, 0x30ff),] 6812 6813 class Korean(unicode_set): 6814 "Unicode set for Korean Unicode Character Range" 6815 _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),] 6816 6817 class CJK(Chinese, Japanese, Korean): 6818 "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" 6819 pass 6820 6821 class Thai(unicode_set): 6822 "Unicode set for Thai Unicode Character Range" 6823 _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),] 6824 6825 class Arabic(unicode_set): 6826 "Unicode set for Arabic Unicode Character Range" 6827 _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),] 6828 6829 class Hebrew(unicode_set): 6830 "Unicode set for Hebrew Unicode Character Range" 6831 _ranges = [(0x0590, 0x05ff),] 6832 6833 class Devanagari(unicode_set): 6834 "Unicode set for Devanagari Unicode Character Range" 6835 _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)] 6836 6837pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges 6838 + pyparsing_unicode.Japanese.Hiragana._ranges 6839 + pyparsing_unicode.Japanese.Katakana._ranges) 6840 6841# define ranges in language character sets 6842if PY_3: 6843 setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic) 6844 setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese) 6845 setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic) 6846 setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek) 6847 setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew) 6848 setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese) 6849 setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji) 6850 setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana) 6851 setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana) 6852 setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean) 6853 setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai) 6854 setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari) 6855 6856 6857class pyparsing_test: 6858 """ 6859 namespace class for classes useful in writing unit tests 6860 """ 6861 6862 class reset_pyparsing_context: 6863 """ 6864 Context manager to be used when writing unit tests that modify pyparsing config values: 6865 - packrat parsing 6866 - default whitespace characters. 6867 - default keyword characters 6868 - literal string auto-conversion class 6869 - __diag__ settings 6870 6871 Example: 6872 with reset_pyparsing_context(): 6873 # test that literals used to construct a grammar are automatically suppressed 6874 ParserElement.inlineLiteralsUsing(Suppress) 6875 6876 term = Word(alphas) | Word(nums) 6877 group = Group('(' + term[...] + ')') 6878 6879 # assert that the '()' characters are not included in the parsed tokens 6880 self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def']) 6881 6882 # after exiting context manager, literals are converted to Literal expressions again 6883 """ 6884 6885 def __init__(self): 6886 self._save_context = {} 6887 6888 def save(self): 6889 self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS 6890 self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS 6891 self._save_context[ 6892 "literal_string_class" 6893 ] = ParserElement._literalStringClass 6894 self._save_context["packrat_enabled"] = ParserElement._packratEnabled 6895 self._save_context["packrat_parse"] = ParserElement._parse 6896 self._save_context["__diag__"] = { 6897 name: getattr(__diag__, name) for name in __diag__._all_names 6898 } 6899 self._save_context["__compat__"] = { 6900 "collect_all_And_tokens": __compat__.collect_all_And_tokens 6901 } 6902 return self 6903 6904 def restore(self): 6905 # reset pyparsing global state 6906 if ( 6907 ParserElement.DEFAULT_WHITE_CHARS 6908 != self._save_context["default_whitespace"] 6909 ): 6910 ParserElement.setDefaultWhitespaceChars( 6911 self._save_context["default_whitespace"] 6912 ) 6913 Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] 6914 ParserElement.inlineLiteralsUsing( 6915 self._save_context["literal_string_class"] 6916 ) 6917 for name, value in self._save_context["__diag__"].items(): 6918 setattr(__diag__, name, value) 6919 ParserElement._packratEnabled = self._save_context["packrat_enabled"] 6920 ParserElement._parse = self._save_context["packrat_parse"] 6921 __compat__.collect_all_And_tokens = self._save_context["__compat__"] 6922 6923 def __enter__(self): 6924 return self.save() 6925 6926 def __exit__(self, *args): 6927 return self.restore() 6928 6929 class TestParseResultsAsserts: 6930 """ 6931 A mixin class to add parse results assertion methods to normal unittest.TestCase classes. 6932 """ 6933 def assertParseResultsEquals( 6934 self, result, expected_list=None, expected_dict=None, msg=None 6935 ): 6936 """ 6937 Unit test assertion to compare a ParseResults object with an optional expected_list, 6938 and compare any defined results names with an optional expected_dict. 6939 """ 6940 if expected_list is not None: 6941 self.assertEqual(expected_list, result.asList(), msg=msg) 6942 if expected_dict is not None: 6943 self.assertEqual(expected_dict, result.asDict(), msg=msg) 6944 6945 def assertParseAndCheckList( 6946 self, expr, test_string, expected_list, msg=None, verbose=True 6947 ): 6948 """ 6949 Convenience wrapper assert to test a parser element and input string, and assert that 6950 the resulting ParseResults.asList() is equal to the expected_list. 6951 """ 6952 result = expr.parseString(test_string, parseAll=True) 6953 if verbose: 6954 print(result.dump()) 6955 self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) 6956 6957 def assertParseAndCheckDict( 6958 self, expr, test_string, expected_dict, msg=None, verbose=True 6959 ): 6960 """ 6961 Convenience wrapper assert to test a parser element and input string, and assert that 6962 the resulting ParseResults.asDict() is equal to the expected_dict. 6963 """ 6964 result = expr.parseString(test_string, parseAll=True) 6965 if verbose: 6966 print(result.dump()) 6967 self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) 6968 6969 def assertRunTestResults( 6970 self, run_tests_report, expected_parse_results=None, msg=None 6971 ): 6972 """ 6973 Unit test assertion to evaluate output of ParserElement.runTests(). If a list of 6974 list-dict tuples is given as the expected_parse_results argument, then these are zipped 6975 with the report tuples returned by runTests and evaluated using assertParseResultsEquals. 6976 Finally, asserts that the overall runTests() success value is True. 6977 6978 :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests 6979 :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] 6980 """ 6981 run_test_success, run_test_results = run_tests_report 6982 6983 if expected_parse_results is not None: 6984 merged = [ 6985 (rpt[0], rpt[1], expected) 6986 for rpt, expected in zip(run_test_results, expected_parse_results) 6987 ] 6988 for test_string, result, expected in merged: 6989 # expected should be a tuple containing a list and/or a dict or an exception, 6990 # and optional failure message string 6991 # an empty tuple will skip any result validation 6992 fail_msg = next( 6993 (exp for exp in expected if isinstance(exp, str)), None 6994 ) 6995 expected_exception = next( 6996 ( 6997 exp 6998 for exp in expected 6999 if isinstance(exp, type) and issubclass(exp, Exception) 7000 ), 7001 None, 7002 ) 7003 if expected_exception is not None: 7004 with self.assertRaises( 7005 expected_exception=expected_exception, msg=fail_msg or msg 7006 ): 7007 if isinstance(result, Exception): 7008 raise result 7009 else: 7010 expected_list = next( 7011 (exp for exp in expected if isinstance(exp, list)), None 7012 ) 7013 expected_dict = next( 7014 (exp for exp in expected if isinstance(exp, dict)), None 7015 ) 7016 if (expected_list, expected_dict) != (None, None): 7017 self.assertParseResultsEquals( 7018 result, 7019 expected_list=expected_list, 7020 expected_dict=expected_dict, 7021 msg=fail_msg or msg, 7022 ) 7023 else: 7024 # warning here maybe? 7025 print("no validation for {!r}".format(test_string)) 7026 7027 # do this last, in case some specific test results can be reported instead 7028 self.assertTrue( 7029 run_test_success, msg=msg if msg is not None else "failed runTests" 7030 ) 7031 7032 @contextmanager 7033 def assertRaisesParseException(self, exc_type=ParseException, msg=None): 7034 with self.assertRaises(exc_type, msg=msg): 7035 yield 7036 7037 7038if __name__ == "__main__": 7039 7040 selectToken = CaselessLiteral("select") 7041 fromToken = CaselessLiteral("from") 7042 7043 ident = Word(alphas, alphanums + "_$") 7044 7045 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 7046 columnNameList = Group(delimitedList(columnName)).setName("columns") 7047 columnSpec = ('*' | columnNameList) 7048 7049 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 7050 tableNameList = Group(delimitedList(tableName)).setName("tables") 7051 7052 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 7053 7054 # demo runTests method, including embedded comments in test string 7055 simpleSQL.runTests(""" 7056 # '*' as column list and dotted table name 7057 select * from SYS.XYZZY 7058 7059 # caseless match on "SELECT", and casts back to "select" 7060 SELECT * from XYZZY, ABC 7061 7062 # list of column names, and mixed case SELECT keyword 7063 Select AA,BB,CC from Sys.dual 7064 7065 # multiple tables 7066 Select A, B, C from Sys.dual, Table2 7067 7068 # invalid SELECT keyword - should fail 7069 Xelect A, B, C from Sys.dual 7070 7071 # incomplete command - should fail 7072 Select 7073 7074 # invalid column name - should fail 7075 Select ^^^ frox Sys.dual 7076 7077 """) 7078 7079 pyparsing_common.number.runTests(""" 7080 100 7081 -100 7082 +100 7083 3.14159 7084 6.02e23 7085 1e-12 7086 """) 7087 7088 # any int or real number, returned as float 7089 pyparsing_common.fnumber.runTests(""" 7090 100 7091 -100 7092 +100 7093 3.14159 7094 6.02e23 7095 1e-12 7096 """) 7097 7098 pyparsing_common.hex_integer.runTests(""" 7099 100 7100 FF 7101 """) 7102 7103 import uuid 7104 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 7105 pyparsing_common.uuid.runTests(""" 7106 12345678-1234-5678-1234-567812345678 7107 """) 7108