1# 2# core.py 3# 4import os 5from typing import ( 6 Optional as OptionalType, 7 Iterable as IterableType, 8 Union, 9 Callable, 10 Any, 11 Generator, 12 Tuple, 13 List, 14 TextIO, 15 Set, 16 Dict as DictType, 17) 18from abc import ABC, abstractmethod 19from enum import Enum 20import string 21import copy 22import warnings 23import re 24import sre_constants 25import sys 26from collections.abc import Iterable 27import traceback 28import types 29from operator import itemgetter 30from functools import wraps 31from threading import RLock 32from pathlib import Path 33 34from .util import ( 35 _FifoCache, 36 _UnboundedCache, 37 __config_flags, 38 _collapse_string_to_ranges, 39 _escape_regex_range_chars, 40 _bslash, 41 _flatten, 42 LRUMemo as _LRUMemo, 43 UnboundedMemo as _UnboundedMemo, 44) 45from .exceptions import * 46from .actions import * 47from .results import ParseResults, _ParseResultsWithOffset 48from .unicode import pyparsing_unicode 49 50_MAX_INT = sys.maxsize 51str_type: Tuple[type, ...] = (str, bytes) 52 53# 54# Copyright (c) 2003-2021 Paul T. McGuire 55# 56# Permission is hereby granted, free of charge, to any person obtaining 57# a copy of this software and associated documentation files (the 58# "Software"), to deal in the Software without restriction, including 59# without limitation the rights to use, copy, modify, merge, publish, 60# distribute, sublicense, and/or sell copies of the Software, and to 61# permit persons to whom the Software is furnished to do so, subject to 62# the following conditions: 63# 64# The above copyright notice and this permission notice shall be 65# included in all copies or substantial portions of the Software. 66# 67# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 68# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 69# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 70# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 71# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 72# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 73# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 74# 75 76 77class __compat__(__config_flags): 78 """ 79 A cross-version compatibility configuration for pyparsing features that will be 80 released in a future version. By setting values in this configuration to True, 81 those features can be enabled in prior versions for compatibility development 82 and testing. 83 84 - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping 85 of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`; 86 maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1 87 behavior 88 """ 89 90 _type_desc = "compatibility" 91 92 collect_all_And_tokens = True 93 94 _all_names = [__ for __ in locals() if not __.startswith("_")] 95 _fixed_names = """ 96 collect_all_And_tokens 97 """.split() 98 99 100class __diag__(__config_flags): 101 _type_desc = "diagnostic" 102 103 warn_multiple_tokens_in_named_alternation = False 104 warn_ungrouped_named_tokens_in_collection = False 105 warn_name_set_on_empty_Forward = False 106 warn_on_parse_using_empty_Forward = False 107 warn_on_assignment_to_Forward = False 108 warn_on_multiple_string_args_to_oneof = False 109 warn_on_match_first_with_lshift_operator = False 110 enable_debug_on_named_expressions = False 111 112 _all_names = [__ for __ in locals() if not __.startswith("_")] 113 _warning_names = [name for name in _all_names if name.startswith("warn")] 114 _debug_names = [name for name in _all_names if name.startswith("enable_debug")] 115 116 @classmethod 117 def enable_all_warnings(cls): 118 for name in cls._warning_names: 119 cls.enable(name) 120 121 122class Diagnostics(Enum): 123 """ 124 Diagnostic configuration (all default to disabled) 125 - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results 126 name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions 127 - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results 128 name is defined on a containing expression with ungrouped subexpressions that also 129 have results names 130 - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined 131 with a results name, but has no contents defined 132 - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is 133 defined in a grammar but has never had an expression attached to it 134 - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined 135 but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'`` 136 - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is 137 incorrectly called with multiple str arguments 138 - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent 139 calls to :class:`ParserElement.set_name` 140 141 Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`. 142 All warnings can be enabled by calling :class:`enable_all_warnings`. 143 """ 144 145 warn_multiple_tokens_in_named_alternation = 0 146 warn_ungrouped_named_tokens_in_collection = 1 147 warn_name_set_on_empty_Forward = 2 148 warn_on_parse_using_empty_Forward = 3 149 warn_on_assignment_to_Forward = 4 150 warn_on_multiple_string_args_to_oneof = 5 151 warn_on_match_first_with_lshift_operator = 6 152 enable_debug_on_named_expressions = 7 153 154 155def enable_diag(diag_enum): 156 """ 157 Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 158 """ 159 __diag__.enable(diag_enum.name) 160 161 162def disable_diag(diag_enum): 163 """ 164 Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`). 165 """ 166 __diag__.disable(diag_enum.name) 167 168 169def enable_all_warnings(): 170 """ 171 Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`). 172 """ 173 __diag__.enable_all_warnings() 174 175 176# hide abstract class 177del __config_flags 178 179 180def _should_enable_warnings( 181 cmd_line_warn_options: List[str], warn_env_var: OptionalType[str] 182) -> bool: 183 enable = bool(warn_env_var) 184 for warn_opt in cmd_line_warn_options: 185 w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split( 186 ":" 187 )[:5] 188 if not w_action.lower().startswith("i") and ( 189 not (w_message or w_category or w_module) or w_module == "pyparsing" 190 ): 191 enable = True 192 elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""): 193 enable = False 194 return enable 195 196 197if _should_enable_warnings( 198 sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS") 199): 200 enable_all_warnings() 201 202 203# build list of single arg builtins, that can be used as parse actions 204_single_arg_builtins = { 205 sum, 206 len, 207 sorted, 208 reversed, 209 list, 210 tuple, 211 set, 212 any, 213 all, 214 min, 215 max, 216} 217 218_generatorType = types.GeneratorType 219ParseAction = Union[ 220 Callable[[], Any], 221 Callable[[ParseResults], Any], 222 Callable[[int, ParseResults], Any], 223 Callable[[str, int, ParseResults], Any], 224] 225ParseCondition = Union[ 226 Callable[[], bool], 227 Callable[[ParseResults], bool], 228 Callable[[int, ParseResults], bool], 229 Callable[[str, int, ParseResults], bool], 230] 231ParseFailAction = Callable[[str, int, "ParserElement", Exception], None] 232DebugStartAction = Callable[[str, int, "ParserElement", bool], None] 233DebugSuccessAction = Callable[ 234 [str, int, int, "ParserElement", ParseResults, bool], None 235] 236DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None] 237 238 239alphas = string.ascii_uppercase + string.ascii_lowercase 240identchars = pyparsing_unicode.Latin1.identchars 241identbodychars = pyparsing_unicode.Latin1.identbodychars 242nums = "0123456789" 243hexnums = nums + "ABCDEFabcdef" 244alphanums = alphas + nums 245printables = "".join(c for c in string.printable if c not in string.whitespace) 246 247_trim_arity_call_line = None 248 249 250def _trim_arity(func, maxargs=2): 251 """decorator to trim function calls to match the arity of the target""" 252 global _trim_arity_call_line 253 254 if func in _single_arg_builtins: 255 return lambda s, l, t: func(t) 256 257 limit = 0 258 found_arity = False 259 260 def extract_tb(tb, limit=0): 261 frames = traceback.extract_tb(tb, limit=limit) 262 frame_summary = frames[-1] 263 return [frame_summary[:2]] 264 265 # synthesize what would be returned by traceback.extract_stack at the call to 266 # user's parse action 'func', so that we don't incur call penalty at parse time 267 268 LINE_DIFF = 11 269 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 270 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 271 _trim_arity_call_line = ( 272 _trim_arity_call_line or traceback.extract_stack(limit=2)[-1] 273 ) 274 pa_call_line_synth = ( 275 _trim_arity_call_line[0], 276 _trim_arity_call_line[1] + LINE_DIFF, 277 ) 278 279 def wrapper(*args): 280 nonlocal found_arity, limit 281 while 1: 282 try: 283 ret = func(*args[limit:]) 284 found_arity = True 285 return ret 286 except TypeError as te: 287 # re-raise TypeErrors if they did not come from our arity testing 288 if found_arity: 289 raise 290 else: 291 tb = te.__traceback__ 292 trim_arity_type_error = ( 293 extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth 294 ) 295 del tb 296 297 if trim_arity_type_error: 298 if limit <= maxargs: 299 limit += 1 300 continue 301 302 raise 303 304 # copy func name to wrapper for sensible debug output 305 # (can't use functools.wraps, since that messes with function signature) 306 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 307 wrapper.__name__ = func_name 308 309 return wrapper 310 311 312def condition_as_parse_action( 313 fn: ParseCondition, message: str = None, fatal: bool = False 314): 315 """ 316 Function to convert a simple predicate function that returns ``True`` or ``False`` 317 into a parse action. Can be used in places when a parse action is required 318 and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition 319 to an operator level in :class:`infix_notation`). 320 321 Optional keyword arguments: 322 323 - ``message`` - define a custom message to be used in the raised exception 324 - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately; 325 otherwise will raise :class:`ParseException` 326 327 """ 328 msg = message if message is not None else "failed user-defined condition" 329 exc_type = ParseFatalException if fatal else ParseException 330 fn = _trim_arity(fn) 331 332 @wraps(fn) 333 def pa(s, l, t): 334 if not bool(fn(s, l, t)): 335 raise exc_type(s, l, msg) 336 337 return pa 338 339 340def _default_start_debug_action( 341 instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False 342): 343 cache_hit_str = "*" if cache_hit else "" 344 print( 345 ( 346 "{}Match {} at loc {}({},{})\n {}\n {}^".format( 347 cache_hit_str, 348 expr, 349 loc, 350 lineno(loc, instring), 351 col(loc, instring), 352 line(loc, instring), 353 " " * (col(loc, instring) - 1), 354 ) 355 ) 356 ) 357 358 359def _default_success_debug_action( 360 instring: str, 361 startloc: int, 362 endloc: int, 363 expr: "ParserElement", 364 toks: ParseResults, 365 cache_hit: bool = False, 366): 367 cache_hit_str = "*" if cache_hit else "" 368 print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list())) 369 370 371def _default_exception_debug_action( 372 instring: str, 373 loc: int, 374 expr: "ParserElement", 375 exc: Exception, 376 cache_hit: bool = False, 377): 378 cache_hit_str = "*" if cache_hit else "" 379 print( 380 "{}Match {} failed, {} raised: {}".format( 381 cache_hit_str, expr, type(exc).__name__, exc 382 ) 383 ) 384 385 386def null_debug_action(*args): 387 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 388 389 390class ParserElement(ABC): 391 """Abstract base level parser element class.""" 392 393 DEFAULT_WHITE_CHARS: str = " \n\t\r" 394 verbose_stacktrace: bool = False 395 _literalStringClass: OptionalType[type] = None 396 397 @staticmethod 398 def set_default_whitespace_chars(chars: str): 399 r""" 400 Overrides the default whitespace chars 401 402 Example:: 403 404 # default whitespace chars are space, <TAB> and newline 405 OneOrMore(Word(alphas)).parse_string("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 406 407 # change to just treat newline as significant 408 ParserElement.set_default_whitespace_chars(" \t") 409 OneOrMore(Word(alphas)).parse_string("abc def\nghi jkl") # -> ['abc', 'def'] 410 """ 411 ParserElement.DEFAULT_WHITE_CHARS = chars 412 413 # update whitespace all parse expressions defined in this module 414 for expr in _builtin_exprs: 415 if expr.copyDefaultWhiteChars: 416 expr.whiteChars = set(chars) 417 418 @staticmethod 419 def inline_literals_using(cls: type): 420 """ 421 Set class to be used for inclusion of string literals into a parser. 422 423 Example:: 424 425 # default literal class used is Literal 426 integer = Word(nums) 427 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 428 429 date_str.parse_string("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 430 431 432 # change to Suppress 433 ParserElement.inline_literals_using(Suppress) 434 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 435 436 date_str.parse_string("1999/12/31") # -> ['1999', '12', '31'] 437 """ 438 ParserElement._literalStringClass = cls 439 440 def __init__(self, savelist: bool = False): 441 self.parseAction: List[ParseAction] = list() 442 self.failAction: OptionalType[ParseFailAction] = None 443 self.customName = None 444 self._defaultName = None 445 self.resultsName = None 446 self.saveAsList = savelist 447 self.skipWhitespace = True 448 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 449 self.copyDefaultWhiteChars = True 450 # used when checking for left-recursion 451 self.mayReturnEmpty = False 452 self.keepTabs = False 453 self.ignoreExprs: List["ParserElement"] = list() 454 self.debug = False 455 self.streamlined = False 456 # optimize exception handling for subclasses that don't advance parse index 457 self.mayIndexError = True 458 self.errmsg = "" 459 # mark results names as modal (report only last) or cumulative (list all) 460 self.modalResults = True 461 # custom debug actions 462 self.debugActions: Tuple[ 463 OptionalType[DebugStartAction], 464 OptionalType[DebugSuccessAction], 465 OptionalType[DebugExceptionAction], 466 ] = (None, None, None) 467 self.re = None 468 # avoid redundant calls to preParse 469 self.callPreparse = True 470 self.callDuringTry = False 471 self.suppress_warnings_ = [] 472 473 def suppress_warning(self, warning_type: Diagnostics): 474 """ 475 Suppress warnings emitted for a particular diagnostic on this expression. 476 477 Example:: 478 479 base = pp.Forward() 480 base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward) 481 482 # statement would normally raise a warning, but is now suppressed 483 print(base.parseString("x")) 484 485 """ 486 self.suppress_warnings_.append(warning_type) 487 return self 488 489 def copy(self) -> "ParserElement": 490 """ 491 Make a copy of this :class:`ParserElement`. Useful for defining 492 different parse actions for the same parsing pattern, using copies of 493 the original parse element. 494 495 Example:: 496 497 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 498 integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K") 499 integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 500 501 print(OneOrMore(integerK | integerM | integer).parse_string("5K 100 640K 256M")) 502 503 prints:: 504 505 [5120, 100, 655360, 268435456] 506 507 Equivalent form of ``expr.copy()`` is just ``expr()``:: 508 509 integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 510 """ 511 cpy = copy.copy(self) 512 cpy.parseAction = self.parseAction[:] 513 cpy.ignoreExprs = self.ignoreExprs[:] 514 if self.copyDefaultWhiteChars: 515 cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 516 return cpy 517 518 def set_results_name( 519 self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False 520 ) -> "ParserElement": 521 """ 522 Define name for referencing matching tokens as a nested attribute 523 of the returned parse results. 524 525 Normally, results names are assigned as you would assign keys in a dict: 526 any existing value is overwritten by later values. If it is necessary to 527 keep all values captured for a particular results name, call ``set_results_name`` 528 with ``list_all_matches`` = True. 529 530 NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object; 531 this is so that the client can define a basic element, such as an 532 integer, and reference it in multiple places with different names. 533 534 You can also set results names using the abbreviated syntax, 535 ``expr("name")`` in place of ``expr.set_results_name("name")`` 536 - see :class:`__call__`. If ``list_all_matches`` is required, use 537 ``expr("name*")``. 538 539 Example:: 540 541 date_str = (integer.set_results_name("year") + '/' 542 + integer.set_results_name("month") + '/' 543 + integer.set_results_name("day")) 544 545 # equivalent form: 546 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 547 """ 548 listAllMatches = listAllMatches or list_all_matches 549 return self._setResultsName(name, listAllMatches) 550 551 def _setResultsName(self, name, listAllMatches=False): 552 if name is None: 553 return self 554 newself = self.copy() 555 if name.endswith("*"): 556 name = name[:-1] 557 listAllMatches = True 558 newself.resultsName = name 559 newself.modalResults = not listAllMatches 560 return newself 561 562 def set_break(self, break_flag: bool = True) -> "ParserElement": 563 """ 564 Method to invoke the Python pdb debugger when this element is 565 about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to 566 disable. 567 """ 568 if break_flag: 569 _parseMethod = self._parse 570 571 def breaker(instring, loc, doActions=True, callPreParse=True): 572 import pdb 573 574 # this call to pdb.set_trace() is intentional, not a checkin error 575 pdb.set_trace() 576 return _parseMethod(instring, loc, doActions, callPreParse) 577 578 breaker._originalParseMethod = _parseMethod 579 self._parse = breaker 580 else: 581 if hasattr(self._parse, "_originalParseMethod"): 582 self._parse = self._parse._originalParseMethod 583 return self 584 585 def set_parse_action( 586 self, *fns: ParseAction, **kwargs 587 ) -> OptionalType["ParserElement"]: 588 """ 589 Define one or more actions to perform when successfully matching parse element definition. 590 591 Parse actions can be called to perform data conversions, do extra validation, 592 update external data structures, or enhance or replace the parsed tokens. 593 Each parse action ``fn`` is a callable method with 0-3 arguments, called as 594 ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 595 596 - s = the original string being parsed (see note below) 597 - loc = the location of the matching substring 598 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object 599 600 The parsed tokens are passed to the parse action as ParseResults. They can be 601 modified in place using list-style append, extend, and pop operations to update 602 the parsed list elements; and with dictionary-style item set and del operations 603 to add, update, or remove any named results. If the tokens are modified in place, 604 it is not necessary to return them with a return statement. 605 606 Parse actions can also completely replace the given tokens, with another ``ParseResults`` 607 object, or with some entirely different object (common for parse actions that perform data 608 conversions). A convenient way to build a new parse result is to define the values 609 using a dict, and then create the return value using :class:`ParseResults.from_dict`. 610 611 If None is passed as the ``fn`` parse action, all previously added parse actions for this 612 expression are cleared. 613 614 Optional keyword arguments: 615 616 - call_during_try = (default= ``False``) indicate if parse action should be run during 617 lookaheads and alternate testing. For parse actions that have side effects, it is 618 important to only call the parse action once it is determined that it is being 619 called as part of a successful parse. For parse actions that perform additional 620 validation, then call_during_try should be passed as True, so that the validation 621 code is included in the preliminary "try" parses. 622 623 Note: the default parsing behavior is to expand tabs in the input string 624 before starting the parsing process. See :class:`parse_string` for more 625 information on parsing strings containing ``<TAB>`` s, and suggested 626 methods to maintain a consistent view of the parsed string, the parse 627 location, and line and column positions within the parsed string. 628 629 Example:: 630 631 # parse dates in the form YYYY/MM/DD 632 633 # use parse action to convert toks from str to int at parse time 634 def convert_to_int(toks): 635 return int(toks[0]) 636 637 # use a parse action to verify that the date is a valid date 638 def is_valid_date(toks): 639 from datetime import date 640 year, month, day = toks[::2] 641 try: 642 date(year, month, day) 643 except ValueError: 644 raise ParseException("invalid date given") 645 646 integer = Word(nums) 647 date_str = integer + '/' + integer + '/' + integer 648 649 # add parse actions 650 integer.set_parse_action(convert_to_int) 651 date_str.set_parse_action(is_valid_date) 652 653 # note that integer fields are now ints, not strings 654 date_str.run_tests(''' 655 # successful parse - note that integer fields were converted to ints 656 1999/12/31 657 658 # fail - invalid date 659 1999/13/31 660 ''') 661 """ 662 if list(fns) == [None]: 663 self.parseAction = [] 664 else: 665 if not all(callable(fn) for fn in fns): 666 raise TypeError("parse actions must be callable") 667 self.parseAction = list(map(_trim_arity, list(fns))) 668 self.callDuringTry = kwargs.get( 669 "call_during_try", kwargs.get("callDuringTry", False) 670 ) 671 return self 672 673 def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement": 674 """ 675 Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`. 676 677 See examples in :class:`copy`. 678 """ 679 self.parseAction += list(map(_trim_arity, list(fns))) 680 self.callDuringTry = self.callDuringTry or kwargs.get( 681 "call_during_try", kwargs.get("callDuringTry", False) 682 ) 683 return self 684 685 def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement": 686 """Add a boolean predicate function to expression's list of parse actions. See 687 :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``, 688 functions passed to ``add_condition`` need to return boolean success/fail of the condition. 689 690 Optional keyword arguments: 691 692 - message = define a custom message to be used in the raised exception 693 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise 694 ParseException 695 - call_during_try = boolean to indicate if this method should be called during internal tryParse calls, 696 default=False 697 698 Example:: 699 700 integer = Word(nums).set_parse_action(lambda toks: int(toks[0])) 701 year_int = integer.copy() 702 year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 703 date_str = year_int + '/' + integer + '/' + integer 704 705 result = date_str.parse_string("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), 706 (line:1, col:1) 707 """ 708 for fn in fns: 709 self.parseAction.append( 710 condition_as_parse_action( 711 fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False) 712 ) 713 ) 714 715 self.callDuringTry = self.callDuringTry or kwargs.get( 716 "call_during_try", kwargs.get("callDuringTry", False) 717 ) 718 return self 719 720 def set_fail_action(self, fn: ParseFailAction) -> "ParserElement": 721 """ 722 Define action to perform if parsing fails at this expression. 723 Fail acton fn is a callable function that takes the arguments 724 ``fn(s, loc, expr, err)`` where: 725 726 - s = string being parsed 727 - loc = location where expression match was attempted and failed 728 - expr = the parse expression that failed 729 - err = the exception thrown 730 731 The function returns no value. It may throw :class:`ParseFatalException` 732 if it is desired to stop parsing immediately.""" 733 self.failAction = fn 734 return self 735 736 def _skipIgnorables(self, instring, loc): 737 exprsFound = True 738 while exprsFound: 739 exprsFound = False 740 for e in self.ignoreExprs: 741 try: 742 while 1: 743 loc, dummy = e._parse(instring, loc) 744 exprsFound = True 745 except ParseException: 746 pass 747 return loc 748 749 def preParse(self, instring, loc): 750 if self.ignoreExprs: 751 loc = self._skipIgnorables(instring, loc) 752 753 if self.skipWhitespace: 754 instrlen = len(instring) 755 white_chars = self.whiteChars 756 while loc < instrlen and instring[loc] in white_chars: 757 loc += 1 758 759 return loc 760 761 def parseImpl(self, instring, loc, doActions=True): 762 return loc, [] 763 764 def postParse(self, instring, loc, tokenlist): 765 return tokenlist 766 767 # @profile 768 def _parseNoCache( 769 self, instring, loc, doActions=True, callPreParse=True 770 ) -> Tuple[int, ParseResults]: 771 TRY, MATCH, FAIL = 0, 1, 2 772 debugging = self.debug # and doActions) 773 len_instring = len(instring) 774 775 if debugging or self.failAction: 776 # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring))) 777 try: 778 if callPreParse and self.callPreparse: 779 pre_loc = self.preParse(instring, loc) 780 else: 781 pre_loc = loc 782 tokens_start = pre_loc 783 if self.debugActions[TRY]: 784 self.debugActions[TRY](instring, tokens_start, self) 785 if self.mayIndexError or pre_loc >= len_instring: 786 try: 787 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 788 except IndexError: 789 raise ParseException(instring, len_instring, self.errmsg, self) 790 else: 791 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 792 except Exception as err: 793 # print("Exception raised:", err) 794 if self.debugActions[FAIL]: 795 self.debugActions[FAIL](instring, tokens_start, self, err) 796 if self.failAction: 797 self.failAction(instring, tokens_start, self, err) 798 raise 799 else: 800 if callPreParse and self.callPreparse: 801 pre_loc = self.preParse(instring, loc) 802 else: 803 pre_loc = loc 804 tokens_start = pre_loc 805 if self.mayIndexError or pre_loc >= len_instring: 806 try: 807 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 808 except IndexError: 809 raise ParseException(instring, len_instring, self.errmsg, self) 810 else: 811 loc, tokens = self.parseImpl(instring, pre_loc, doActions) 812 813 tokens = self.postParse(instring, loc, tokens) 814 815 ret_tokens = ParseResults( 816 tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults 817 ) 818 if self.parseAction and (doActions or self.callDuringTry): 819 if debugging: 820 try: 821 for fn in self.parseAction: 822 try: 823 tokens = fn(instring, tokens_start, ret_tokens) 824 except IndexError as parse_action_exc: 825 exc = ParseException("exception raised in parse action") 826 raise exc from parse_action_exc 827 828 if tokens is not None and tokens is not ret_tokens: 829 ret_tokens = ParseResults( 830 tokens, 831 self.resultsName, 832 asList=self.saveAsList 833 and isinstance(tokens, (ParseResults, list)), 834 modal=self.modalResults, 835 ) 836 except Exception as err: 837 # print "Exception raised in user parse action:", err 838 if self.debugActions[FAIL]: 839 self.debugActions[FAIL](instring, tokens_start, self, err) 840 raise 841 else: 842 for fn in self.parseAction: 843 try: 844 tokens = fn(instring, tokens_start, ret_tokens) 845 except IndexError as parse_action_exc: 846 exc = ParseException("exception raised in parse action") 847 raise exc from parse_action_exc 848 849 if tokens is not None and tokens is not ret_tokens: 850 ret_tokens = ParseResults( 851 tokens, 852 self.resultsName, 853 asList=self.saveAsList 854 and isinstance(tokens, (ParseResults, list)), 855 modal=self.modalResults, 856 ) 857 if debugging: 858 # print("Matched", self, "->", ret_tokens.as_list()) 859 if self.debugActions[MATCH]: 860 self.debugActions[MATCH](instring, tokens_start, loc, self, ret_tokens) 861 862 return loc, ret_tokens 863 864 def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int: 865 try: 866 return self._parse(instring, loc, doActions=False)[0] 867 except ParseFatalException: 868 if raise_fatal: 869 raise 870 raise ParseException(instring, loc, self.errmsg, self) 871 872 def can_parse_next(self, instring: str, loc: int) -> bool: 873 try: 874 self.try_parse(instring, loc) 875 except (ParseException, IndexError): 876 return False 877 else: 878 return True 879 880 # cache for left-recursion in Forward references 881 recursion_lock = RLock() 882 recursion_memos: DictType[ 883 Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]] 884 ] = {} 885 886 # argument cache for optimizing repeated calls when backtracking through recursive expressions 887 packrat_cache = ( 888 {} 889 ) # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail 890 packrat_cache_lock = RLock() 891 packrat_cache_stats = [0, 0] 892 893 # this method gets repeatedly called during backtracking with the same arguments - 894 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 895 def _parseCache( 896 self, instring, loc, doActions=True, callPreParse=True 897 ) -> Tuple[int, ParseResults]: 898 HIT, MISS = 0, 1 899 TRY, MATCH, FAIL = 0, 1, 2 900 lookup = (self, instring, loc, callPreParse, doActions) 901 with ParserElement.packrat_cache_lock: 902 cache = ParserElement.packrat_cache 903 value = cache.get(lookup) 904 if value is cache.not_in_cache: 905 ParserElement.packrat_cache_stats[MISS] += 1 906 try: 907 value = self._parseNoCache(instring, loc, doActions, callPreParse) 908 except ParseBaseException as pe: 909 # cache a copy of the exception, without the traceback 910 cache.set(lookup, pe.__class__(*pe.args)) 911 raise 912 else: 913 cache.set(lookup, (value[0], value[1].copy(), loc)) 914 return value 915 else: 916 ParserElement.packrat_cache_stats[HIT] += 1 917 if self.debug and self.debugActions[TRY]: 918 try: 919 self.debugActions[TRY](instring, loc, self, cache_hit=True) 920 except TypeError: 921 pass 922 if isinstance(value, Exception): 923 if self.debug and self.debugActions[FAIL]: 924 try: 925 self.debugActions[FAIL]( 926 instring, loc, self, value, cache_hit=True 927 ) 928 except TypeError: 929 pass 930 raise value 931 932 loc_, result, endloc = value[0], value[1].copy(), value[2] 933 if self.debug and self.debugActions[MATCH]: 934 try: 935 self.debugActions[MATCH]( 936 instring, loc_, endloc, self, result, cache_hit=True 937 ) 938 except TypeError: 939 pass 940 941 return loc_, result 942 943 _parse = _parseNoCache 944 945 @staticmethod 946 def reset_cache() -> None: 947 ParserElement.packrat_cache.clear() 948 ParserElement.packrat_cache_stats[:] = [0] * len( 949 ParserElement.packrat_cache_stats 950 ) 951 ParserElement.recursion_memos.clear() 952 953 _packratEnabled = False 954 _left_recursion_enabled = False 955 956 @staticmethod 957 def disable_memoization() -> None: 958 """ 959 Disables active Packrat or Left Recursion parsing and their memoization 960 961 This method also works if neither Packrat nor Left Recursion are enabled. 962 This makes it safe to call before activating Packrat nor Left Recursion 963 to clear any previous settings. 964 """ 965 ParserElement.reset_cache() 966 ParserElement._left_recursion_enabled = False 967 ParserElement._packratEnabled = False 968 ParserElement._parse = ParserElement._parseNoCache 969 970 @staticmethod 971 def enable_left_recursion( 972 cache_size_limit: OptionalType[int] = None, *, force=False 973 ) -> None: 974 """ 975 Enables "bounded recursion" parsing, which allows for both direct and indirect 976 left-recursion. During parsing, left-recursive :class:`Forward` elements are 977 repeatedly matched with a fixed recursion depth that is gradually increased 978 until finding the longest match. 979 980 Example:: 981 982 import pyparsing as pp 983 pp.ParserElement.enable_left_recursion() 984 985 E = pp.Forward("E") 986 num = pp.Word(pp.nums) 987 # match `num`, or `num '+' num`, or `num '+' num '+' num`, ... 988 E <<= E + '+' - num | num 989 990 print(E.parse_string("1+2+3")) 991 992 Recursion search naturally memoizes matches of ``Forward`` elements and may 993 thus skip reevaluation of parse actions during backtracking. This may break 994 programs with parse actions which rely on strict ordering of side-effects. 995 996 Parameters: 997 998 - cache_size_limit - (default=``None``) - memoize at most this many 999 ``Forward`` elements during matching; if ``None`` (the default), 1000 memoize all ``Forward`` elements. 1001 1002 Bounded Recursion parsing works similar but not identical to Packrat parsing, 1003 thus the two cannot be used together. Use ``force=True`` to disable any 1004 previous, conflicting settings. 1005 """ 1006 if force: 1007 ParserElement.disable_memoization() 1008 elif ParserElement._packratEnabled: 1009 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 1010 if cache_size_limit is None: 1011 ParserElement.recursion_memos = _UnboundedMemo() 1012 elif cache_size_limit > 0: 1013 ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit) 1014 else: 1015 raise NotImplementedError("Memo size of %s" % cache_size_limit) 1016 ParserElement._left_recursion_enabled = True 1017 1018 @staticmethod 1019 def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None: 1020 """ 1021 Enables "packrat" parsing, which adds memoizing to the parsing logic. 1022 Repeated parse attempts at the same string location (which happens 1023 often in many complex grammars) can immediately return a cached value, 1024 instead of re-executing parsing/validating code. Memoizing is done of 1025 both valid results and parsing exceptions. 1026 1027 Parameters: 1028 1029 - cache_size_limit - (default= ``128``) - if an integer value is provided 1030 will limit the size of the packrat cache; if None is passed, then 1031 the cache size will be unbounded; if 0 is passed, the cache will 1032 be effectively disabled. 1033 1034 This speedup may break existing programs that use parse actions that 1035 have side-effects. For this reason, packrat parsing is disabled when 1036 you first import pyparsing. To activate the packrat feature, your 1037 program must call the class method :class:`ParserElement.enable_packrat`. 1038 For best results, call ``enable_packrat()`` immediately after 1039 importing pyparsing. 1040 1041 Example:: 1042 1043 import pyparsing 1044 pyparsing.ParserElement.enable_packrat() 1045 1046 Packrat parsing works similar but not identical to Bounded Recursion parsing, 1047 thus the two cannot be used together. Use ``force=True`` to disable any 1048 previous, conflicting settings. 1049 """ 1050 if force: 1051 ParserElement.disable_memoization() 1052 elif ParserElement._left_recursion_enabled: 1053 raise RuntimeError("Packrat and Bounded Recursion are not compatible") 1054 if not ParserElement._packratEnabled: 1055 ParserElement._packratEnabled = True 1056 if cache_size_limit is None: 1057 ParserElement.packrat_cache = _UnboundedCache() 1058 else: 1059 ParserElement.packrat_cache = _FifoCache(cache_size_limit) 1060 ParserElement._parse = ParserElement._parseCache 1061 1062 def parse_string( 1063 self, instring: str, parse_all: bool = False, *, parseAll: bool = False 1064 ) -> ParseResults: 1065 """ 1066 Parse a string with respect to the parser definition. This function is intended as the primary interface to the 1067 client code. 1068 1069 :param instring: The input string to be parsed. 1070 :param parse_all: If set, the entire input string must match the grammar. 1071 :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release. 1072 :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar. 1073 :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or 1074 an object with attributes if the given parser includes results names. 1075 1076 If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This 1077 is also equivalent to ending the grammar with :class:`StringEnd`(). 1078 1079 To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are 1080 converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string 1081 contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string 1082 being parsed, one can ensure a consistent view of the input string by doing one of the following: 1083 1084 - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`), 1085 - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the 1086 parse action's ``s`` argument, or 1087 - explicitly expand the tabs in your input string before calling ``parse_string``. 1088 1089 Examples: 1090 1091 By default, partial matches are OK. 1092 1093 >>> res = Word('a').parse_string('aaaaabaaa') 1094 >>> print(res) 1095 ['aaaaa'] 1096 1097 The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children 1098 directly to see more examples. 1099 1100 It raises an exception if parse_all flag is set and instring does not match the whole grammar. 1101 1102 >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True) 1103 Traceback (most recent call last): 1104 ... 1105 pyparsing.ParseException: Expected end of text, found 'b' (at char 5), (line:1, col:6) 1106 """ 1107 parseAll = parse_all or parseAll 1108 1109 ParserElement.reset_cache() 1110 if not self.streamlined: 1111 self.streamline() 1112 for e in self.ignoreExprs: 1113 e.streamline() 1114 if not self.keepTabs: 1115 instring = instring.expandtabs() 1116 try: 1117 loc, tokens = self._parse(instring, 0) 1118 if parseAll: 1119 loc = self.preParse(instring, loc) 1120 se = Empty() + StringEnd() 1121 se._parse(instring, loc) 1122 except ParseBaseException as exc: 1123 if ParserElement.verbose_stacktrace: 1124 raise 1125 else: 1126 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 1127 raise exc.with_traceback(None) 1128 else: 1129 return tokens 1130 1131 def scan_string( 1132 self, 1133 instring: str, 1134 max_matches: int = _MAX_INT, 1135 overlap: bool = False, 1136 *, 1137 debug: bool = False, 1138 maxMatches: int = _MAX_INT, 1139 ) -> Generator[Tuple[ParseResults, int, int], None, None]: 1140 """ 1141 Scan the input string for expression matches. Each match will return the 1142 matching tokens, start location, and end location. May be called with optional 1143 ``max_matches`` argument, to clip scanning after 'n' matches are found. If 1144 ``overlap`` is specified, then overlapping matches will be reported. 1145 1146 Note that the start and end locations are reported relative to the string 1147 being parsed. See :class:`parse_string` for more information on parsing 1148 strings with embedded tabs. 1149 1150 Example:: 1151 1152 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1153 print(source) 1154 for tokens, start, end in Word(alphas).scan_string(source): 1155 print(' '*start + '^'*(end-start)) 1156 print(' '*start + tokens[0]) 1157 1158 prints:: 1159 1160 sldjf123lsdjjkf345sldkjf879lkjsfd987 1161 ^^^^^ 1162 sldjf 1163 ^^^^^^^ 1164 lsdjjkf 1165 ^^^^^^ 1166 sldkjf 1167 ^^^^^^ 1168 lkjsfd 1169 """ 1170 maxMatches = min(maxMatches, max_matches) 1171 if not self.streamlined: 1172 self.streamline() 1173 for e in self.ignoreExprs: 1174 e.streamline() 1175 1176 if not self.keepTabs: 1177 instring = str(instring).expandtabs() 1178 instrlen = len(instring) 1179 loc = 0 1180 preparseFn = self.preParse 1181 parseFn = self._parse 1182 ParserElement.resetCache() 1183 matches = 0 1184 try: 1185 while loc <= instrlen and matches < maxMatches: 1186 try: 1187 preloc = preparseFn(instring, loc) 1188 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 1189 except ParseException: 1190 loc = preloc + 1 1191 else: 1192 if nextLoc > loc: 1193 matches += 1 1194 if debug: 1195 print( 1196 { 1197 "tokens": tokens.asList(), 1198 "start": preloc, 1199 "end": nextLoc, 1200 } 1201 ) 1202 yield tokens, preloc, nextLoc 1203 if overlap: 1204 nextloc = preparseFn(instring, loc) 1205 if nextloc > loc: 1206 loc = nextLoc 1207 else: 1208 loc += 1 1209 else: 1210 loc = nextLoc 1211 else: 1212 loc = preloc + 1 1213 except ParseBaseException as exc: 1214 if ParserElement.verbose_stacktrace: 1215 raise 1216 else: 1217 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1218 raise exc.with_traceback(None) 1219 1220 def transform_string(self, instring: str, *, debug: bool = False) -> str: 1221 """ 1222 Extension to :class:`scan_string`, to modify matching text with modified tokens that may 1223 be returned from a parse action. To use ``transform_string``, define a grammar and 1224 attach a parse action to it that modifies the returned token list. 1225 Invoking ``transform_string()`` on a target string will then scan for matches, 1226 and replace the matched text patterns according to the logic in the parse 1227 action. ``transform_string()`` returns the resulting transformed string. 1228 1229 Example:: 1230 1231 wd = Word(alphas) 1232 wd.set_parse_action(lambda toks: toks[0].title()) 1233 1234 print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york.")) 1235 1236 prints:: 1237 1238 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1239 """ 1240 out = [] 1241 lastE = 0 1242 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1243 # keep string locs straight between transform_string and scan_string 1244 self.keepTabs = True 1245 try: 1246 for t, s, e in self.scan_string(instring, debug=debug): 1247 out.append(instring[lastE:s]) 1248 if t: 1249 if isinstance(t, ParseResults): 1250 out += t.as_list() 1251 elif isinstance(t, Iterable) and not isinstance(t, str_type): 1252 out += list(t) 1253 else: 1254 out.append(t) 1255 lastE = e 1256 out.append(instring[lastE:]) 1257 out = [o for o in out if o] 1258 return "".join(map(str, _flatten(out))) 1259 except ParseBaseException as exc: 1260 if ParserElement.verbose_stacktrace: 1261 raise 1262 else: 1263 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1264 raise exc.with_traceback(None) 1265 1266 def search_string( 1267 self, 1268 instring: str, 1269 max_matches: int = _MAX_INT, 1270 *, 1271 debug: bool = False, 1272 maxMatches: int = _MAX_INT, 1273 ) -> ParseResults: 1274 """ 1275 Another extension to :class:`scan_string`, simplifying the access to the tokens found 1276 to match the given parse expression. May be called with optional 1277 ``max_matches`` argument, to clip searching after 'n' matches are found. 1278 1279 Example:: 1280 1281 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1282 cap_word = Word(alphas.upper(), alphas.lower()) 1283 1284 print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")) 1285 1286 # the sum() builtin can be used to merge results into a single ParseResults object 1287 print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))) 1288 1289 prints:: 1290 1291 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 1292 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 1293 """ 1294 maxMatches = min(maxMatches, max_matches) 1295 try: 1296 return ParseResults( 1297 [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)] 1298 ) 1299 except ParseBaseException as exc: 1300 if ParserElement.verbose_stacktrace: 1301 raise 1302 else: 1303 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1304 raise exc.with_traceback(None) 1305 1306 def split( 1307 self, 1308 instring: str, 1309 maxsplit: int = _MAX_INT, 1310 include_separators: bool = False, 1311 *, 1312 includeSeparators=False, 1313 ) -> Generator[str, None, None]: 1314 """ 1315 Generator method to split a string using the given expression as a separator. 1316 May be called with optional ``maxsplit`` argument, to limit the number of splits; 1317 and the optional ``include_separators`` argument (default= ``False``), if the separating 1318 matching text should be included in the split results. 1319 1320 Example:: 1321 1322 punc = one_of(list(".,;:/-!?")) 1323 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1324 1325 prints:: 1326 1327 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1328 """ 1329 includeSeparators = includeSeparators or include_separators 1330 last = 0 1331 for t, s, e in self.scan_string(instring, max_matches=maxsplit): 1332 yield instring[last:s] 1333 if includeSeparators: 1334 yield t[0] 1335 last = e 1336 yield instring[last:] 1337 1338 def __add__(self, other): 1339 """ 1340 Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement` 1341 converts them to :class:`Literal`s by default. 1342 1343 Example:: 1344 1345 greet = Word(alphas) + "," + Word(alphas) + "!" 1346 hello = "Hello, World!" 1347 print(hello, "->", greet.parse_string(hello)) 1348 1349 prints:: 1350 1351 Hello, World! -> ['Hello', ',', 'World', '!'] 1352 1353 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. 1354 1355 Literal('start') + ... + Literal('end') 1356 1357 is equivalent to: 1358 1359 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 1360 1361 Note that the skipped text is returned with '_skipped' as a results name, 1362 and to support having multiple skips in the same parser, the value returned is 1363 a list of all skipped text. 1364 """ 1365 if other is Ellipsis: 1366 return _PendingSkip(self) 1367 1368 if isinstance(other, str_type): 1369 other = self._literalStringClass(other) 1370 if not isinstance(other, ParserElement): 1371 raise TypeError( 1372 "Cannot combine element of type {} with ParserElement".format( 1373 type(other).__name__ 1374 ) 1375 ) 1376 return And([self, other]) 1377 1378 def __radd__(self, other): 1379 """ 1380 Implementation of ``+`` operator when left operand is not a :class:`ParserElement` 1381 """ 1382 if other is Ellipsis: 1383 return SkipTo(self)("_skipped*") + self 1384 1385 if isinstance(other, str_type): 1386 other = self._literalStringClass(other) 1387 if not isinstance(other, ParserElement): 1388 raise TypeError( 1389 "Cannot combine element of type {} with ParserElement".format( 1390 type(other).__name__ 1391 ) 1392 ) 1393 return other + self 1394 1395 def __sub__(self, other): 1396 """ 1397 Implementation of ``-`` operator, returns :class:`And` with error stop 1398 """ 1399 if isinstance(other, str_type): 1400 other = self._literalStringClass(other) 1401 if not isinstance(other, ParserElement): 1402 raise TypeError( 1403 "Cannot combine element of type {} with ParserElement".format( 1404 type(other).__name__ 1405 ) 1406 ) 1407 return self + And._ErrorStop() + other 1408 1409 def __rsub__(self, other): 1410 """ 1411 Implementation of ``-`` operator when left operand is not a :class:`ParserElement` 1412 """ 1413 if isinstance(other, str_type): 1414 other = self._literalStringClass(other) 1415 if not isinstance(other, ParserElement): 1416 raise TypeError( 1417 "Cannot combine element of type {} with ParserElement".format( 1418 type(other).__name__ 1419 ) 1420 ) 1421 return other - self 1422 1423 def __mul__(self, other): 1424 """ 1425 Implementation of ``*`` operator, allows use of ``expr * 3`` in place of 1426 ``expr + expr + expr``. Expressions may also be multiplied by a 2-integer 1427 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 1428 may also include ``None`` as in: 1429 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 1430 to ``expr*n + ZeroOrMore(expr)`` 1431 (read as "at least n instances of ``expr``") 1432 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 1433 (read as "0 to n instances of ``expr``") 1434 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 1435 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 1436 1437 Note that ``expr*(None, n)`` does not raise an exception if 1438 more than n exprs exist in the input stream; that is, 1439 ``expr*(None, n)`` does not enforce a maximum number of expr 1440 occurrences. If this behavior is desired, then write 1441 ``expr*(None, n) + ~expr`` 1442 """ 1443 if other is Ellipsis: 1444 other = (0, None) 1445 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 1446 other = ((0,) + other[1:] + (None,))[:2] 1447 1448 if isinstance(other, int): 1449 minElements, optElements = other, 0 1450 elif isinstance(other, tuple): 1451 other = tuple(o if o is not Ellipsis else None for o in other) 1452 other = (other + (None, None))[:2] 1453 if other[0] is None: 1454 other = (0, other[1]) 1455 if isinstance(other[0], int) and other[1] is None: 1456 if other[0] == 0: 1457 return ZeroOrMore(self) 1458 if other[0] == 1: 1459 return OneOrMore(self) 1460 else: 1461 return self * other[0] + ZeroOrMore(self) 1462 elif isinstance(other[0], int) and isinstance(other[1], int): 1463 minElements, optElements = other 1464 optElements -= minElements 1465 else: 1466 raise TypeError( 1467 "cannot multiply ParserElement and ({}) objects".format( 1468 ",".join(type(item).__name__ for item in other) 1469 ) 1470 ) 1471 else: 1472 raise TypeError( 1473 "cannot multiply ParserElement and {} objects".format( 1474 type(other).__name__ 1475 ) 1476 ) 1477 1478 if minElements < 0: 1479 raise ValueError("cannot multiply ParserElement by negative value") 1480 if optElements < 0: 1481 raise ValueError( 1482 "second tuple value must be greater or equal to first tuple value" 1483 ) 1484 if minElements == optElements == 0: 1485 return And([]) 1486 1487 if optElements: 1488 1489 def makeOptionalList(n): 1490 if n > 1: 1491 return Opt(self + makeOptionalList(n - 1)) 1492 else: 1493 return Opt(self) 1494 1495 if minElements: 1496 if minElements == 1: 1497 ret = self + makeOptionalList(optElements) 1498 else: 1499 ret = And([self] * minElements) + makeOptionalList(optElements) 1500 else: 1501 ret = makeOptionalList(optElements) 1502 else: 1503 if minElements == 1: 1504 ret = self 1505 else: 1506 ret = And([self] * minElements) 1507 return ret 1508 1509 def __rmul__(self, other): 1510 return self.__mul__(other) 1511 1512 def __or__(self, other): 1513 """ 1514 Implementation of ``|`` operator - returns :class:`MatchFirst` 1515 """ 1516 if other is Ellipsis: 1517 return _PendingSkip(self, must_skip=True) 1518 1519 if isinstance(other, str_type): 1520 other = self._literalStringClass(other) 1521 if not isinstance(other, ParserElement): 1522 raise TypeError( 1523 "Cannot combine element of type {} with ParserElement".format( 1524 type(other).__name__ 1525 ) 1526 ) 1527 return MatchFirst([self, other]) 1528 1529 def __ror__(self, other): 1530 """ 1531 Implementation of ``|`` operator when left operand is not a :class:`ParserElement` 1532 """ 1533 if isinstance(other, str_type): 1534 other = self._literalStringClass(other) 1535 if not isinstance(other, ParserElement): 1536 raise TypeError( 1537 "Cannot combine element of type {} with ParserElement".format( 1538 type(other).__name__ 1539 ) 1540 ) 1541 return other | self 1542 1543 def __xor__(self, other): 1544 """ 1545 Implementation of ``^`` operator - returns :class:`Or` 1546 """ 1547 if isinstance(other, str_type): 1548 other = self._literalStringClass(other) 1549 if not isinstance(other, ParserElement): 1550 raise TypeError( 1551 "Cannot combine element of type {} with ParserElement".format( 1552 type(other).__name__ 1553 ) 1554 ) 1555 return Or([self, other]) 1556 1557 def __rxor__(self, other): 1558 """ 1559 Implementation of ``^`` operator when left operand is not a :class:`ParserElement` 1560 """ 1561 if isinstance(other, str_type): 1562 other = self._literalStringClass(other) 1563 if not isinstance(other, ParserElement): 1564 raise TypeError( 1565 "Cannot combine element of type {} with ParserElement".format( 1566 type(other).__name__ 1567 ) 1568 ) 1569 return other ^ self 1570 1571 def __and__(self, other): 1572 """ 1573 Implementation of ``&`` operator - returns :class:`Each` 1574 """ 1575 if isinstance(other, str_type): 1576 other = self._literalStringClass(other) 1577 if not isinstance(other, ParserElement): 1578 raise TypeError( 1579 "Cannot combine element of type {} with ParserElement".format( 1580 type(other).__name__ 1581 ) 1582 ) 1583 return Each([self, other]) 1584 1585 def __rand__(self, other): 1586 """ 1587 Implementation of ``&`` operator when left operand is not a :class:`ParserElement` 1588 """ 1589 if isinstance(other, str_type): 1590 other = self._literalStringClass(other) 1591 if not isinstance(other, ParserElement): 1592 raise TypeError( 1593 "Cannot combine element of type {} with ParserElement".format( 1594 type(other).__name__ 1595 ) 1596 ) 1597 return other & self 1598 1599 def __invert__(self): 1600 """ 1601 Implementation of ``~`` operator - returns :class:`NotAny` 1602 """ 1603 return NotAny(self) 1604 1605 # disable __iter__ to override legacy use of sequential access to __getitem__ to 1606 # iterate over a sequence 1607 __iter__ = None 1608 1609 def __getitem__(self, key): 1610 """ 1611 use ``[]`` indexing notation as a short form for expression repetition: 1612 1613 - ``expr[n]`` is equivalent to ``expr*n`` 1614 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 1615 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 1616 to ``expr*n + ZeroOrMore(expr)`` 1617 (read as "at least n instances of ``expr``") 1618 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 1619 (read as "0 to n instances of ``expr``") 1620 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 1621 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 1622 1623 ``None`` may be used in place of ``...``. 1624 1625 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception 1626 if more than ``n`` ``expr``s exist in the input stream. If this behavior is 1627 desired, then write ``expr[..., n] + ~expr``. 1628 """ 1629 1630 # convert single arg keys to tuples 1631 try: 1632 if isinstance(key, str_type): 1633 key = (key,) 1634 iter(key) 1635 except TypeError: 1636 key = (key, key) 1637 1638 if len(key) > 2: 1639 raise TypeError( 1640 "only 1 or 2 index arguments supported ({}{})".format( 1641 key[:5], "... [{}]".format(len(key)) if len(key) > 5 else "" 1642 ) 1643 ) 1644 1645 # clip to 2 elements 1646 ret = self * tuple(key[:2]) 1647 return ret 1648 1649 def __call__(self, name: str = None): 1650 """ 1651 Shortcut for :class:`set_results_name`, with ``list_all_matches=False``. 1652 1653 If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be 1654 passed as ``True``. 1655 1656 If ``name` is omitted, same as calling :class:`copy`. 1657 1658 Example:: 1659 1660 # these are equivalent 1661 userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno") 1662 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 1663 """ 1664 if name is not None: 1665 return self._setResultsName(name) 1666 else: 1667 return self.copy() 1668 1669 def suppress(self) -> "ParserElement": 1670 """ 1671 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 1672 cluttering up returned output. 1673 """ 1674 return Suppress(self) 1675 1676 def ignore_whitespace(self, recursive: bool = True) -> "ParserElement": 1677 """ 1678 Enables the skipping of whitespace before matching the characters in the 1679 :class:`ParserElement`'s defined pattern. 1680 1681 :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any) 1682 """ 1683 self.skipWhitespace = True 1684 return self 1685 1686 def leave_whitespace(self, recursive: bool = True) -> "ParserElement": 1687 """ 1688 Disables the skipping of whitespace before matching the characters in the 1689 :class:`ParserElement`'s defined pattern. This is normally only used internally by 1690 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1691 1692 :param recursive: If true (the default), also disable whitespace skipping in child elements (if any) 1693 """ 1694 self.skipWhitespace = False 1695 return self 1696 1697 def set_whitespace_chars( 1698 self, chars: Union[Set[str], str], copy_defaults: bool = False 1699 ) -> "ParserElement": 1700 """ 1701 Overrides the default whitespace chars 1702 """ 1703 self.skipWhitespace = True 1704 self.whiteChars = set(chars) 1705 self.copyDefaultWhiteChars = copy_defaults 1706 return self 1707 1708 def parse_with_tabs(self) -> "ParserElement": 1709 """ 1710 Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string. 1711 Must be called before ``parse_string`` when the input grammar contains elements that 1712 match ``<TAB>`` characters. 1713 """ 1714 self.keepTabs = True 1715 return self 1716 1717 def ignore(self, other: "ParserElement") -> "ParserElement": 1718 """ 1719 Define expression to be ignored (e.g., comments) while doing pattern 1720 matching; may be called repeatedly, to define multiple comment or other 1721 ignorable patterns. 1722 1723 Example:: 1724 1725 patt = OneOrMore(Word(alphas)) 1726 patt.parse_string('ablaj /* comment */ lskjd') 1727 # -> ['ablaj'] 1728 1729 patt.ignore(c_style_comment) 1730 patt.parse_string('ablaj /* comment */ lskjd') 1731 # -> ['ablaj', 'lskjd'] 1732 """ 1733 import typing 1734 1735 if isinstance(other, str_type): 1736 other = Suppress(other) 1737 1738 if isinstance(other, Suppress): 1739 if other not in self.ignoreExprs: 1740 self.ignoreExprs.append(other) 1741 else: 1742 self.ignoreExprs.append(Suppress(other.copy())) 1743 return self 1744 1745 def set_debug_actions( 1746 self, 1747 start_action: DebugStartAction, 1748 success_action: DebugSuccessAction, 1749 exception_action: DebugExceptionAction, 1750 ) -> "ParserElement": 1751 """ 1752 Customize display of debugging messages while doing pattern matching: 1753 1754 - ``start_action`` - method to be called when an expression is about to be parsed; 1755 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)`` 1756 1757 - ``success_action`` - method to be called when an expression has successfully parsed; 1758 should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)`` 1759 1760 - ``exception_action`` - method to be called when expression fails to parse; 1761 should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)`` 1762 """ 1763 self.debugActions = ( 1764 start_action or _default_start_debug_action, 1765 success_action or _default_success_debug_action, 1766 exception_action or _default_exception_debug_action, 1767 ) 1768 self.debug = True 1769 return self 1770 1771 def set_debug(self, flag=True) -> "ParserElement": 1772 """ 1773 Enable display of debugging messages while doing pattern matching. 1774 Set ``flag`` to ``True`` to enable, ``False`` to disable. 1775 1776 Example:: 1777 1778 wd = Word(alphas).set_name("alphaword") 1779 integer = Word(nums).set_name("numword") 1780 term = wd | integer 1781 1782 # turn on debugging for wd 1783 wd.set_debug() 1784 1785 OneOrMore(term).parse_string("abc 123 xyz 890") 1786 1787 prints:: 1788 1789 Match alphaword at loc 0(1,1) 1790 Matched alphaword -> ['abc'] 1791 Match alphaword at loc 3(1,4) 1792 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 1793 Match alphaword at loc 7(1,8) 1794 Matched alphaword -> ['xyz'] 1795 Match alphaword at loc 11(1,12) 1796 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 1797 Match alphaword at loc 15(1,16) 1798 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 1799 1800 The output shown is that produced by the default debug actions - custom debug actions can be 1801 specified using :class:`set_debug_actions`. Prior to attempting 1802 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 1803 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 1804 message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression, 1805 which makes debugging and exception messages easier to understand - for instance, the default 1806 name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``. 1807 """ 1808 if flag: 1809 self.set_debug_actions( 1810 _default_start_debug_action, 1811 _default_success_debug_action, 1812 _default_exception_debug_action, 1813 ) 1814 else: 1815 self.debug = False 1816 return self 1817 1818 @property 1819 def default_name(self) -> str: 1820 if self._defaultName is None: 1821 self._defaultName = self._generateDefaultName() 1822 return self._defaultName 1823 1824 @abstractmethod 1825 def _generateDefaultName(self): 1826 """ 1827 Child classes must define this method, which defines how the ``default_name`` is set. 1828 """ 1829 1830 def set_name(self, name: str) -> "ParserElement": 1831 """ 1832 Define name for this expression, makes debugging and exception messages clearer. 1833 Example:: 1834 Word(nums).parse_string("ABC") # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1) 1835 Word(nums).set_name("integer").parse_string("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1836 """ 1837 self.customName = name 1838 self.errmsg = "Expected " + self.name 1839 if __diag__.enable_debug_on_named_expressions: 1840 self.set_debug() 1841 return self 1842 1843 @property 1844 def name(self) -> str: 1845 # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name 1846 return self.customName if self.customName is not None else self.default_name 1847 1848 def __str__(self) -> str: 1849 return self.name 1850 1851 def __repr__(self) -> str: 1852 return str(self) 1853 1854 def streamline(self) -> "ParserElement": 1855 self.streamlined = True 1856 self._defaultName = None 1857 return self 1858 1859 def recurse(self): 1860 return [] 1861 1862 def _checkRecursion(self, parseElementList): 1863 subRecCheckList = parseElementList[:] + [self] 1864 for e in self.recurse(): 1865 e._checkRecursion(subRecCheckList) 1866 1867 def validate(self, validateTrace=None): 1868 """ 1869 Check defined expressions for valid structure, check for infinite recursive definitions. 1870 """ 1871 self._checkRecursion([]) 1872 1873 def parse_file( 1874 self, 1875 file_or_filename: Union[str, Path, TextIO], 1876 encoding: str = "utf-8", 1877 parse_all: bool = False, 1878 *, 1879 parseAll: bool = False, 1880 ) -> ParseResults: 1881 """ 1882 Execute the parse expression on the given file or filename. 1883 If a filename is specified (instead of a file object), 1884 the entire file is opened, read, and closed before parsing. 1885 """ 1886 parseAll = parseAll or parse_all 1887 try: 1888 file_contents = file_or_filename.read() 1889 except AttributeError: 1890 with open(file_or_filename, "r", encoding=encoding) as f: 1891 file_contents = f.read() 1892 try: 1893 return self.parse_string(file_contents, parseAll) 1894 except ParseBaseException as exc: 1895 if ParserElement.verbose_stacktrace: 1896 raise 1897 else: 1898 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1899 raise exc.with_traceback(None) 1900 1901 def __eq__(self, other): 1902 if self is other: 1903 return True 1904 elif isinstance(other, str_type): 1905 return self.matches(other, parse_all=True) 1906 elif isinstance(other, ParserElement): 1907 return vars(self) == vars(other) 1908 return False 1909 1910 def __hash__(self): 1911 return id(self) 1912 1913 def matches( 1914 self, test_string: str, parse_all: bool = True, *, parseAll: bool = True 1915 ) -> bool: 1916 """ 1917 Method for quick testing of a parser against a test string. Good for simple 1918 inline microtests of sub expressions while building up larger parser. 1919 1920 Parameters: 1921 - ``test_string`` - to test against this expression for a match 1922 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 1923 1924 Example:: 1925 1926 expr = Word(nums) 1927 assert expr.matches("100") 1928 """ 1929 parseAll = parseAll and parse_all 1930 try: 1931 self.parse_string(str(test_string), parse_all=parseAll) 1932 return True 1933 except ParseBaseException: 1934 return False 1935 1936 def run_tests( 1937 self, 1938 tests: Union[str, List[str]], 1939 parse_all: bool = True, 1940 comment: OptionalType[Union["ParserElement", str]] = "#", 1941 full_dump: bool = True, 1942 print_results: bool = True, 1943 failure_tests: bool = False, 1944 post_parse: Callable[[str, ParseResults], str] = None, 1945 file: OptionalType[TextIO] = None, 1946 with_line_numbers: bool = False, 1947 *, 1948 parseAll: bool = True, 1949 fullDump: bool = True, 1950 printResults: bool = True, 1951 failureTests: bool = False, 1952 postParse: Callable[[str, ParseResults], str] = None, 1953 ): 1954 """ 1955 Execute the parse expression on a series of test strings, showing each 1956 test, the parsed results or where the parse failed. Quick and easy way to 1957 run a parse expression against a list of sample strings. 1958 1959 Parameters: 1960 - ``tests`` - a list of separate test strings, or a multiline string of test strings 1961 - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests 1962 - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test 1963 string; pass None to disable comment filtering 1964 - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline; 1965 if False, only dump nested list 1966 - ``print_results`` - (default= ``True``) prints test output to stdout 1967 - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing 1968 - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as 1969 `fn(test_string, parse_results)` and returns a string to be added to the test output 1970 - ``file`` - (default= ``None``) optional file-like object to which test output will be written; 1971 if None, will default to ``sys.stdout`` 1972 - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers 1973 1974 Returns: a (success, results) tuple, where success indicates that all tests succeeded 1975 (or failed if ``failure_tests`` is True), and the results contain a list of lines of each 1976 test's output 1977 1978 Example:: 1979 1980 number_expr = pyparsing_common.number.copy() 1981 1982 result = number_expr.run_tests(''' 1983 # unsigned integer 1984 100 1985 # negative integer 1986 -100 1987 # float with scientific notation 1988 6.02e23 1989 # integer with scientific notation 1990 1e-12 1991 ''') 1992 print("Success" if result[0] else "Failed!") 1993 1994 result = number_expr.run_tests(''' 1995 # stray character 1996 100Z 1997 # missing leading digit before '.' 1998 -.100 1999 # too many '.' 2000 3.14.159 2001 ''', failure_tests=True) 2002 print("Success" if result[0] else "Failed!") 2003 2004 prints:: 2005 2006 # unsigned integer 2007 100 2008 [100] 2009 2010 # negative integer 2011 -100 2012 [-100] 2013 2014 # float with scientific notation 2015 6.02e23 2016 [6.02e+23] 2017 2018 # integer with scientific notation 2019 1e-12 2020 [1e-12] 2021 2022 Success 2023 2024 # stray character 2025 100Z 2026 ^ 2027 FAIL: Expected end of text (at char 3), (line:1, col:4) 2028 2029 # missing leading digit before '.' 2030 -.100 2031 ^ 2032 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2033 2034 # too many '.' 2035 3.14.159 2036 ^ 2037 FAIL: Expected end of text (at char 4), (line:1, col:5) 2038 2039 Success 2040 2041 Each test string must be on a single line. If you want to test a string that spans multiple 2042 lines, create a test like this:: 2043 2044 expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines") 2045 2046 (Note that this is a raw string literal, you must include the leading ``'r'``.) 2047 """ 2048 from .testing import pyparsing_test 2049 2050 parseAll = parseAll and parse_all 2051 fullDump = fullDump and full_dump 2052 printResults = printResults and print_results 2053 failureTests = failureTests or failure_tests 2054 postParse = postParse or post_parse 2055 if isinstance(tests, str_type): 2056 tests = list(map(type(tests).strip, tests.rstrip().splitlines())) 2057 if isinstance(comment, str_type): 2058 comment = Literal(comment) 2059 if file is None: 2060 file = sys.stdout 2061 print_ = file.write 2062 2063 result: Union[ParseResults, Exception] 2064 allResults = [] 2065 comments = [] 2066 success = True 2067 NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string) 2068 BOM = "\ufeff" 2069 for t in tests: 2070 if comment is not None and comment.matches(t, False) or comments and not t: 2071 comments.append(pyparsing_test.with_line_numbers(t)) 2072 continue 2073 if not t: 2074 continue 2075 out = [ 2076 "\n" + "\n".join(comments) if comments else "", 2077 pyparsing_test.with_line_numbers(t) if with_line_numbers else t, 2078 ] 2079 comments = [] 2080 try: 2081 # convert newline marks to actual newlines, and strip leading BOM if present 2082 t = NL.transform_string(t.lstrip(BOM)) 2083 result = self.parse_string(t, parse_all=parseAll) 2084 except ParseBaseException as pe: 2085 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2086 out.append(pe.explain()) 2087 out.append("FAIL: " + str(pe)) 2088 if ParserElement.verbose_stacktrace: 2089 out.extend(traceback.format_tb(pe.__traceback__)) 2090 success = success and failureTests 2091 result = pe 2092 except Exception as exc: 2093 out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc)) 2094 if ParserElement.verbose_stacktrace: 2095 out.extend(traceback.format_tb(exc.__traceback__)) 2096 success = success and failureTests 2097 result = exc 2098 else: 2099 success = success and not failureTests 2100 if postParse is not None: 2101 try: 2102 pp_value = postParse(t, result) 2103 if pp_value is not None: 2104 if isinstance(pp_value, ParseResults): 2105 out.append(pp_value.dump()) 2106 else: 2107 out.append(str(pp_value)) 2108 else: 2109 out.append(result.dump()) 2110 except Exception as e: 2111 out.append(result.dump(full=fullDump)) 2112 out.append( 2113 "{} failed: {}: {}".format( 2114 postParse.__name__, type(e).__name__, e 2115 ) 2116 ) 2117 else: 2118 out.append(result.dump(full=fullDump)) 2119 out.append("") 2120 2121 if printResults: 2122 print_("\n".join(out)) 2123 2124 allResults.append((t, result)) 2125 2126 return success, allResults 2127 2128 def create_diagram( 2129 self, 2130 output_html: Union[TextIO, Path, str], 2131 vertical: int = 3, 2132 show_results_names: bool = False, 2133 **kwargs, 2134 ) -> None: 2135 """ 2136 Create a railroad diagram for the parser. 2137 2138 Parameters: 2139 - output_html (str or file-like object) - output target for generated 2140 diagram HTML 2141 - vertical (int) - threshold for formatting multiple alternatives vertically 2142 instead of horizontally (default=3) 2143 - show_results_names - bool flag whether diagram should show annotations for 2144 defined results names 2145 2146 Additional diagram-formatting keyword arguments can also be included; 2147 see railroad.Diagram class. 2148 """ 2149 2150 try: 2151 from .diagram import to_railroad, railroad_to_html 2152 except ImportError as ie: 2153 raise Exception( 2154 "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams" 2155 ) from ie 2156 2157 self.streamline() 2158 2159 railroad = to_railroad( 2160 self, 2161 vertical=vertical, 2162 show_results_names=show_results_names, 2163 diagram_kwargs=kwargs, 2164 ) 2165 if isinstance(output_html, (str, Path)): 2166 with open(output_html, "w", encoding="utf-8") as diag_file: 2167 diag_file.write(railroad_to_html(railroad)) 2168 else: 2169 # we were passed a file-like object, just write to it 2170 output_html.write(railroad_to_html(railroad)) 2171 2172 setDefaultWhitespaceChars = set_default_whitespace_chars 2173 inlineLiteralsUsing = inline_literals_using 2174 setResultsName = set_results_name 2175 setBreak = set_break 2176 setParseAction = set_parse_action 2177 addParseAction = add_parse_action 2178 addCondition = add_condition 2179 setFailAction = set_fail_action 2180 tryParse = try_parse 2181 canParseNext = can_parse_next 2182 resetCache = reset_cache 2183 enableLeftRecursion = enable_left_recursion 2184 enablePackrat = enable_packrat 2185 parseString = parse_string 2186 scanString = scan_string 2187 searchString = search_string 2188 transformString = transform_string 2189 setWhitespaceChars = set_whitespace_chars 2190 parseWithTabs = parse_with_tabs 2191 setDebugActions = set_debug_actions 2192 setDebug = set_debug 2193 defaultName = default_name 2194 setName = set_name 2195 parseFile = parse_file 2196 runTests = run_tests 2197 ignoreWhitespace = ignore_whitespace 2198 leaveWhitespace = leave_whitespace 2199 2200 2201class _PendingSkip(ParserElement): 2202 # internal placeholder class to hold a place were '...' is added to a parser element, 2203 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 2204 def __init__(self, expr: ParserElement, must_skip: bool = False): 2205 super().__init__() 2206 self.anchor = expr 2207 self.must_skip = must_skip 2208 2209 def _generateDefaultName(self): 2210 return str(self.anchor + Empty()).replace("Empty", "...") 2211 2212 def __add__(self, other): 2213 skipper = SkipTo(other).set_name("...")("_skipped*") 2214 if self.must_skip: 2215 2216 def must_skip(t): 2217 if not t._skipped or t._skipped.as_list() == [""]: 2218 del t[0] 2219 t.pop("_skipped", None) 2220 2221 def show_skip(t): 2222 if t._skipped.as_list()[-1:] == [""]: 2223 t.pop("_skipped") 2224 t["_skipped"] = "missing <" + repr(self.anchor) + ">" 2225 2226 return ( 2227 self.anchor + skipper().add_parse_action(must_skip) 2228 | skipper().add_parse_action(show_skip) 2229 ) + other 2230 2231 return self.anchor + skipper + other 2232 2233 def __repr__(self): 2234 return self.defaultName 2235 2236 def parseImpl(self, *args): 2237 raise Exception( 2238 "use of `...` expression without following SkipTo target expression" 2239 ) 2240 2241 2242class Token(ParserElement): 2243 """Abstract :class:`ParserElement` subclass, for defining atomic 2244 matching patterns. 2245 """ 2246 2247 def __init__(self): 2248 super().__init__(savelist=False) 2249 2250 def _generateDefaultName(self): 2251 return type(self).__name__ 2252 2253 2254class Empty(Token): 2255 """ 2256 An empty token, will always match. 2257 """ 2258 2259 def __init__(self): 2260 super().__init__() 2261 self.mayReturnEmpty = True 2262 self.mayIndexError = False 2263 2264 2265class NoMatch(Token): 2266 """ 2267 A token that will never match. 2268 """ 2269 2270 def __init__(self): 2271 super().__init__() 2272 self.mayReturnEmpty = True 2273 self.mayIndexError = False 2274 self.errmsg = "Unmatchable token" 2275 2276 def parseImpl(self, instring, loc, doActions=True): 2277 raise ParseException(instring, loc, self.errmsg, self) 2278 2279 2280class Literal(Token): 2281 """ 2282 Token to exactly match a specified string. 2283 2284 Example:: 2285 2286 Literal('blah').parse_string('blah') # -> ['blah'] 2287 Literal('blah').parse_string('blahfooblah') # -> ['blah'] 2288 Literal('blah').parse_string('bla') # -> Exception: Expected "blah" 2289 2290 For case-insensitive matching, use :class:`CaselessLiteral`. 2291 2292 For keyword matching (force word break before and after the matched string), 2293 use :class:`Keyword` or :class:`CaselessKeyword`. 2294 """ 2295 2296 def __init__(self, match_string: str = "", *, matchString: str = ""): 2297 super().__init__() 2298 match_string = matchString or match_string 2299 self.match = match_string 2300 self.matchLen = len(match_string) 2301 try: 2302 self.firstMatchChar = match_string[0] 2303 except IndexError: 2304 raise ValueError("null string passed to Literal; use Empty() instead") 2305 self.errmsg = "Expected " + self.name 2306 self.mayReturnEmpty = False 2307 self.mayIndexError = False 2308 2309 # Performance tuning: modify __class__ to select 2310 # a parseImpl optimized for single-character check 2311 if self.matchLen == 1 and type(self) is Literal: 2312 self.__class__ = _SingleCharLiteral 2313 2314 def _generateDefaultName(self): 2315 return repr(self.match) 2316 2317 def parseImpl(self, instring, loc, doActions=True): 2318 if instring[loc] == self.firstMatchChar and instring.startswith( 2319 self.match, loc 2320 ): 2321 return loc + self.matchLen, self.match 2322 raise ParseException(instring, loc, self.errmsg, self) 2323 2324 2325class _SingleCharLiteral(Literal): 2326 def parseImpl(self, instring, loc, doActions=True): 2327 if instring[loc] == self.firstMatchChar: 2328 return loc + 1, self.match 2329 raise ParseException(instring, loc, self.errmsg, self) 2330 2331 2332ParserElement._literalStringClass = Literal 2333 2334 2335class Keyword(Token): 2336 """ 2337 Token to exactly match a specified string as a keyword, that is, 2338 it must be immediately followed by a non-keyword character. Compare 2339 with :class:`Literal`: 2340 2341 - ``Literal("if")`` will match the leading ``'if'`` in 2342 ``'ifAndOnlyIf'``. 2343 - ``Keyword("if")`` will not; it will only match the leading 2344 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 2345 2346 Accepts two optional constructor arguments in addition to the 2347 keyword string: 2348 2349 - ``identChars`` is a string of characters that would be valid 2350 identifier characters, defaulting to all alphanumerics + "_" and 2351 "$" 2352 - ``caseless`` allows case-insensitive matching, default is ``False``. 2353 2354 Example:: 2355 2356 Keyword("start").parse_string("start") # -> ['start'] 2357 Keyword("start").parse_string("starting") # -> Exception 2358 2359 For case-insensitive matching, use :class:`CaselessKeyword`. 2360 """ 2361 2362 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 2363 2364 def __init__( 2365 self, 2366 match_string: str = "", 2367 ident_chars: OptionalType[str] = None, 2368 caseless: bool = False, 2369 *, 2370 matchString: str = "", 2371 identChars: OptionalType[str] = None, 2372 ): 2373 super().__init__() 2374 identChars = identChars or ident_chars 2375 if identChars is None: 2376 identChars = Keyword.DEFAULT_KEYWORD_CHARS 2377 match_string = matchString or match_string 2378 self.match = match_string 2379 self.matchLen = len(match_string) 2380 try: 2381 self.firstMatchChar = match_string[0] 2382 except IndexError: 2383 raise ValueError("null string passed to Keyword; use Empty() instead") 2384 self.errmsg = "Expected {} {}".format(type(self).__name__, self.name) 2385 self.mayReturnEmpty = False 2386 self.mayIndexError = False 2387 self.caseless = caseless 2388 if caseless: 2389 self.caselessmatch = match_string.upper() 2390 identChars = identChars.upper() 2391 self.identChars = set(identChars) 2392 2393 def _generateDefaultName(self): 2394 return repr(self.match) 2395 2396 def parseImpl(self, instring, loc, doActions=True): 2397 errmsg = self.errmsg 2398 errloc = loc 2399 if self.caseless: 2400 if instring[loc : loc + self.matchLen].upper() == self.caselessmatch: 2401 if loc == 0 or instring[loc - 1].upper() not in self.identChars: 2402 if ( 2403 loc >= len(instring) - self.matchLen 2404 or instring[loc + self.matchLen].upper() not in self.identChars 2405 ): 2406 return loc + self.matchLen, self.match 2407 else: 2408 # followed by keyword char 2409 errmsg += ", was immediately followed by keyword character" 2410 errloc = loc + self.matchLen 2411 else: 2412 # preceded by keyword char 2413 errmsg += ", keyword was immediately preceded by keyword character" 2414 errloc = loc - 1 2415 # else no match just raise plain exception 2416 2417 else: 2418 if ( 2419 instring[loc] == self.firstMatchChar 2420 and self.matchLen == 1 2421 or instring.startswith(self.match, loc) 2422 ): 2423 if loc == 0 or instring[loc - 1] not in self.identChars: 2424 if ( 2425 loc >= len(instring) - self.matchLen 2426 or instring[loc + self.matchLen] not in self.identChars 2427 ): 2428 return loc + self.matchLen, self.match 2429 else: 2430 # followed by keyword char 2431 errmsg += ( 2432 ", keyword was immediately followed by keyword character" 2433 ) 2434 errloc = loc + self.matchLen 2435 else: 2436 # preceded by keyword char 2437 errmsg += ", keyword was immediately preceded by keyword character" 2438 errloc = loc - 1 2439 # else no match just raise plain exception 2440 2441 raise ParseException(instring, errloc, errmsg, self) 2442 2443 @staticmethod 2444 def set_default_keyword_chars(chars): 2445 """ 2446 Overrides the default characters used by :class:`Keyword` expressions. 2447 """ 2448 Keyword.DEFAULT_KEYWORD_CHARS = chars 2449 2450 setDefaultKeywordChars = set_default_keyword_chars 2451 2452 2453class CaselessLiteral(Literal): 2454 """ 2455 Token to match a specified string, ignoring case of letters. 2456 Note: the matched results will always be in the case of the given 2457 match string, NOT the case of the input text. 2458 2459 Example:: 2460 2461 OneOrMore(CaselessLiteral("CMD")).parse_string("cmd CMD Cmd10") 2462 # -> ['CMD', 'CMD', 'CMD'] 2463 2464 (Contrast with example for :class:`CaselessKeyword`.) 2465 """ 2466 2467 def __init__(self, match_string: str = "", *, matchString: str = ""): 2468 match_string = matchString or match_string 2469 super().__init__(match_string.upper()) 2470 # Preserve the defining literal. 2471 self.returnString = match_string 2472 self.errmsg = "Expected " + self.name 2473 2474 def parseImpl(self, instring, loc, doActions=True): 2475 if instring[loc : loc + self.matchLen].upper() == self.match: 2476 return loc + self.matchLen, self.returnString 2477 raise ParseException(instring, loc, self.errmsg, self) 2478 2479 2480class CaselessKeyword(Keyword): 2481 """ 2482 Caseless version of :class:`Keyword`. 2483 2484 Example:: 2485 2486 OneOrMore(CaselessKeyword("CMD")).parse_string("cmd CMD Cmd10") 2487 # -> ['CMD', 'CMD'] 2488 2489 (Contrast with example for :class:`CaselessLiteral`.) 2490 """ 2491 2492 def __init__( 2493 self, 2494 match_string: str = "", 2495 ident_chars: OptionalType[str] = None, 2496 *, 2497 matchString: str = "", 2498 identChars: OptionalType[str] = None, 2499 ): 2500 identChars = identChars or ident_chars 2501 match_string = matchString or match_string 2502 super().__init__(match_string, identChars, caseless=True) 2503 2504 2505class CloseMatch(Token): 2506 """A variation on :class:`Literal` which matches "close" matches, 2507 that is, strings with at most 'n' mismatching characters. 2508 :class:`CloseMatch` takes parameters: 2509 2510 - ``match_string`` - string to be matched 2511 - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters 2512 - ``max_mismatches`` - (``default=1``) maximum number of 2513 mismatches allowed to count as a match 2514 2515 The results from a successful parse will contain the matched text 2516 from the input string and the following named results: 2517 2518 - ``mismatches`` - a list of the positions within the 2519 match_string where mismatches were found 2520 - ``original`` - the original match_string used to compare 2521 against the input string 2522 2523 If ``mismatches`` is an empty list, then the match was an exact 2524 match. 2525 2526 Example:: 2527 2528 patt = CloseMatch("ATCATCGAATGGA") 2529 patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 2530 patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 2531 2532 # exact match 2533 patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 2534 2535 # close match allowing up to 2 mismatches 2536 patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2) 2537 patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 2538 """ 2539 2540 def __init__( 2541 self, 2542 match_string: str, 2543 max_mismatches: int = None, 2544 *, 2545 maxMismatches: int = 1, 2546 caseless=False, 2547 ): 2548 maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches 2549 super().__init__() 2550 self.match_string = match_string 2551 self.maxMismatches = maxMismatches 2552 self.errmsg = "Expected {!r} (with up to {} mismatches)".format( 2553 self.match_string, self.maxMismatches 2554 ) 2555 self.caseless = caseless 2556 self.mayIndexError = False 2557 self.mayReturnEmpty = False 2558 2559 def _generateDefaultName(self): 2560 return "{}:{!r}".format(type(self).__name__, self.match_string) 2561 2562 def parseImpl(self, instring, loc, doActions=True): 2563 start = loc 2564 instrlen = len(instring) 2565 maxloc = start + len(self.match_string) 2566 2567 if maxloc <= instrlen: 2568 match_string = self.match_string 2569 match_stringloc = 0 2570 mismatches = [] 2571 maxMismatches = self.maxMismatches 2572 2573 for match_stringloc, s_m in enumerate( 2574 zip(instring[loc:maxloc], match_string) 2575 ): 2576 src, mat = s_m 2577 if self.caseless: 2578 src, mat = src.lower(), mat.lower() 2579 2580 if src != mat: 2581 mismatches.append(match_stringloc) 2582 if len(mismatches) > maxMismatches: 2583 break 2584 else: 2585 loc = start + match_stringloc + 1 2586 results = ParseResults([instring[start:loc]]) 2587 results["original"] = match_string 2588 results["mismatches"] = mismatches 2589 return loc, results 2590 2591 raise ParseException(instring, loc, self.errmsg, self) 2592 2593 2594class Word(Token): 2595 """Token for matching words composed of allowed character sets. 2596 Parameters: 2597 - ``init_chars`` - string of all characters that should be used to 2598 match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.; 2599 if ``body_chars`` is also specified, then this is the string of 2600 initial characters 2601 - ``body_chars`` - string of characters that 2602 can be used for matching after a matched initial character as 2603 given in ``init_chars``; if omitted, same as the initial characters 2604 (default=``None``) 2605 - ``min`` - minimum number of characters to match (default=1) 2606 - ``max`` - maximum number of characters to match (default=0) 2607 - ``exact`` - exact number of characters to match (default=0) 2608 - ``as_keyword`` - match as a keyword (default=``False``) 2609 - ``exclude_chars`` - characters that might be 2610 found in the input ``body_chars`` string but which should not be 2611 accepted for matching ;useful to define a word of all 2612 printables except for one or two characters, for instance 2613 (default=``None``) 2614 2615 :class:`srange` is useful for defining custom character set strings 2616 for defining :class:`Word` expressions, using range notation from 2617 regular expression character sets. 2618 2619 A common mistake is to use :class:`Word` to match a specific literal 2620 string, as in ``Word("Address")``. Remember that :class:`Word` 2621 uses the string argument to define *sets* of matchable characters. 2622 This expression would match "Add", "AAA", "dAred", or any other word 2623 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 2624 exact literal string, use :class:`Literal` or :class:`Keyword`. 2625 2626 pyparsing includes helper strings for building Words: 2627 2628 - :class:`alphas` 2629 - :class:`nums` 2630 - :class:`alphanums` 2631 - :class:`hexnums` 2632 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 2633 - accented, tilded, umlauted, etc.) 2634 - :class:`punc8bit` (non-alphabetic characters in ASCII range 2635 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2636 - :class:`printables` (any non-whitespace character) 2637 2638 ``alphas``, ``nums``, and ``printables`` are also defined in several 2639 Unicode sets - see :class:`pyparsing_unicode``. 2640 2641 Example:: 2642 2643 # a word composed of digits 2644 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2645 2646 # a word with a leading capital, and zero or more lowercase 2647 capital_word = Word(alphas.upper(), alphas.lower()) 2648 2649 # hostnames are alphanumeric, with leading alpha, and '-' 2650 hostname = Word(alphas, alphanums + '-') 2651 2652 # roman numeral (not a strict parser, accepts invalid mix of characters) 2653 roman = Word("IVXLCDM") 2654 2655 # any string of non-whitespace characters, except for ',' 2656 csv_value = Word(printables, exclude_chars=",") 2657 """ 2658 2659 def __init__( 2660 self, 2661 init_chars: str = "", 2662 body_chars: OptionalType[str] = None, 2663 min: int = 1, 2664 max: int = 0, 2665 exact: int = 0, 2666 as_keyword: bool = False, 2667 exclude_chars: OptionalType[str] = None, 2668 *, 2669 initChars: OptionalType[str] = None, 2670 bodyChars: OptionalType[str] = None, 2671 asKeyword: bool = False, 2672 excludeChars: OptionalType[str] = None, 2673 ): 2674 initChars = initChars or init_chars 2675 bodyChars = bodyChars or body_chars 2676 asKeyword = asKeyword or as_keyword 2677 excludeChars = excludeChars or exclude_chars 2678 super().__init__() 2679 if not initChars: 2680 raise ValueError( 2681 "invalid {}, initChars cannot be empty string".format( 2682 type(self).__name__ 2683 ) 2684 ) 2685 2686 initChars = set(initChars) 2687 self.initChars = initChars 2688 if excludeChars: 2689 excludeChars = set(excludeChars) 2690 initChars -= excludeChars 2691 if bodyChars: 2692 bodyChars = set(bodyChars) - excludeChars 2693 self.initCharsOrig = "".join(sorted(initChars)) 2694 2695 if bodyChars: 2696 self.bodyCharsOrig = "".join(sorted(bodyChars)) 2697 self.bodyChars = set(bodyChars) 2698 else: 2699 self.bodyCharsOrig = "".join(sorted(initChars)) 2700 self.bodyChars = set(initChars) 2701 2702 self.maxSpecified = max > 0 2703 2704 if min < 1: 2705 raise ValueError( 2706 "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted" 2707 ) 2708 2709 self.minLen = min 2710 2711 if max > 0: 2712 self.maxLen = max 2713 else: 2714 self.maxLen = _MAX_INT 2715 2716 if exact > 0: 2717 self.maxLen = exact 2718 self.minLen = exact 2719 2720 self.errmsg = "Expected " + self.name 2721 self.mayIndexError = False 2722 self.asKeyword = asKeyword 2723 2724 # see if we can make a regex for this Word 2725 if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0): 2726 if self.bodyChars == self.initChars: 2727 if max == 0: 2728 repeat = "+" 2729 elif max == 1: 2730 repeat = "" 2731 else: 2732 repeat = "{{{},{}}}".format( 2733 self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen 2734 ) 2735 self.reString = "[{}]{}".format( 2736 _collapse_string_to_ranges(self.initChars), 2737 repeat, 2738 ) 2739 elif len(self.initChars) == 1: 2740 if max == 0: 2741 repeat = "*" 2742 else: 2743 repeat = "{{0,{}}}".format(max - 1) 2744 self.reString = "{}[{}]{}".format( 2745 re.escape(self.initCharsOrig), 2746 _collapse_string_to_ranges(self.bodyChars), 2747 repeat, 2748 ) 2749 else: 2750 if max == 0: 2751 repeat = "*" 2752 elif max == 2: 2753 repeat = "" 2754 else: 2755 repeat = "{{0,{}}}".format(max - 1) 2756 self.reString = "[{}][{}]{}".format( 2757 _collapse_string_to_ranges(self.initChars), 2758 _collapse_string_to_ranges(self.bodyChars), 2759 repeat, 2760 ) 2761 if self.asKeyword: 2762 self.reString = r"\b" + self.reString + r"\b" 2763 2764 try: 2765 self.re = re.compile(self.reString) 2766 except sre_constants.error: 2767 self.re = None 2768 else: 2769 self.re_match = self.re.match 2770 self.__class__ = _WordRegex 2771 2772 def _generateDefaultName(self): 2773 def charsAsStr(s): 2774 max_repr_len = 16 2775 s = _collapse_string_to_ranges(s, re_escape=False) 2776 if len(s) > max_repr_len: 2777 return s[: max_repr_len - 3] + "..." 2778 else: 2779 return s 2780 2781 if self.initChars != self.bodyChars: 2782 base = "W:({}, {})".format( 2783 charsAsStr(self.initChars), charsAsStr(self.bodyChars) 2784 ) 2785 else: 2786 base = "W:({})".format(charsAsStr(self.initChars)) 2787 2788 # add length specification 2789 if self.minLen > 1 or self.maxLen != _MAX_INT: 2790 if self.minLen == self.maxLen: 2791 if self.minLen == 1: 2792 return base[2:] 2793 else: 2794 return base + "{{{}}}".format(self.minLen) 2795 elif self.maxLen == _MAX_INT: 2796 return base + "{{{},...}}".format(self.minLen) 2797 else: 2798 return base + "{{{},{}}}".format(self.minLen, self.maxLen) 2799 return base 2800 2801 def parseImpl(self, instring, loc, doActions=True): 2802 if instring[loc] not in self.initChars: 2803 raise ParseException(instring, loc, self.errmsg, self) 2804 2805 start = loc 2806 loc += 1 2807 instrlen = len(instring) 2808 bodychars = self.bodyChars 2809 maxloc = start + self.maxLen 2810 maxloc = min(maxloc, instrlen) 2811 while loc < maxloc and instring[loc] in bodychars: 2812 loc += 1 2813 2814 throwException = False 2815 if loc - start < self.minLen: 2816 throwException = True 2817 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2818 throwException = True 2819 elif self.asKeyword: 2820 if ( 2821 start > 0 2822 and instring[start - 1] in bodychars 2823 or loc < instrlen 2824 and instring[loc] in bodychars 2825 ): 2826 throwException = True 2827 2828 if throwException: 2829 raise ParseException(instring, loc, self.errmsg, self) 2830 2831 return loc, instring[start:loc] 2832 2833 2834class _WordRegex(Word): 2835 def parseImpl(self, instring, loc, doActions=True): 2836 result = self.re_match(instring, loc) 2837 if not result: 2838 raise ParseException(instring, loc, self.errmsg, self) 2839 2840 loc = result.end() 2841 return loc, result.group() 2842 2843 2844class Char(_WordRegex): 2845 """A short-cut class for defining :class:`Word` ``(characters, exact=1)``, 2846 when defining a match of any single character in a string of 2847 characters. 2848 """ 2849 2850 def __init__( 2851 self, 2852 charset: str, 2853 as_keyword: bool = False, 2854 exclude_chars: OptionalType[str] = None, 2855 *, 2856 asKeyword: bool = False, 2857 excludeChars: OptionalType[str] = None, 2858 ): 2859 asKeyword = asKeyword or as_keyword 2860 excludeChars = excludeChars or exclude_chars 2861 super().__init__( 2862 charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars 2863 ) 2864 self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars)) 2865 if asKeyword: 2866 self.reString = r"\b{}\b".format(self.reString) 2867 self.re = re.compile(self.reString) 2868 self.re_match = self.re.match 2869 2870 2871class Regex(Token): 2872 r"""Token for matching strings that match a given regular 2873 expression. Defined with string specifying the regular expression in 2874 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 2875 If the given regex contains named groups (defined using ``(?P<name>...)``), 2876 these will be preserved as named :class:`ParseResults`. 2877 2878 If instead of the Python stdlib ``re`` module you wish to use a different RE module 2879 (such as the ``regex`` module), you can do so by building your ``Regex`` object with 2880 a compiled RE that was compiled using ``regex``. 2881 2882 Example:: 2883 2884 realnum = Regex(r"[+-]?\d+\.\d*") 2885 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2886 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2887 2888 # named fields in a regex will be returned as named results 2889 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 2890 2891 # the Regex class will accept re's compiled using the regex module 2892 import regex 2893 parser = pp.Regex(regex.compile(r'[0-9]')) 2894 """ 2895 2896 def __init__( 2897 self, 2898 pattern: Any, 2899 flags: Union[re.RegexFlag, int] = 0, 2900 as_group_list: bool = False, 2901 as_match: bool = False, 2902 *, 2903 asGroupList: bool = False, 2904 asMatch: bool = False, 2905 ): 2906 """The parameters ``pattern`` and ``flags`` are passed 2907 to the ``re.compile()`` function as-is. See the Python 2908 `re module <https://docs.python.org/3/library/re.html>`_ module for an 2909 explanation of the acceptable patterns and flags. 2910 """ 2911 super().__init__() 2912 asGroupList = asGroupList or as_group_list 2913 asMatch = asMatch or as_match 2914 2915 if isinstance(pattern, str_type): 2916 if not pattern: 2917 raise ValueError("null string passed to Regex; use Empty() instead") 2918 2919 self.pattern = pattern 2920 self.flags = flags 2921 2922 try: 2923 self.re = re.compile(self.pattern, self.flags) 2924 self.reString = self.pattern 2925 except sre_constants.error: 2926 raise ValueError( 2927 "invalid pattern ({!r}) passed to Regex".format(pattern) 2928 ) 2929 2930 elif hasattr(pattern, "pattern") and hasattr(pattern, "match"): 2931 self.re = pattern 2932 self.pattern = self.reString = pattern.pattern 2933 self.flags = flags 2934 2935 else: 2936 raise TypeError( 2937 "Regex may only be constructed with a string or a compiled RE object" 2938 ) 2939 2940 self.re_match = self.re.match 2941 2942 self.errmsg = "Expected " + self.name 2943 self.mayIndexError = False 2944 self.mayReturnEmpty = self.re_match("") is not None 2945 self.asGroupList = asGroupList 2946 self.asMatch = asMatch 2947 if self.asGroupList: 2948 self.parseImpl = self.parseImplAsGroupList 2949 if self.asMatch: 2950 self.parseImpl = self.parseImplAsMatch 2951 2952 def _generateDefaultName(self): 2953 return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\")) 2954 2955 def parseImpl(self, instring, loc, doActions=True): 2956 result = self.re_match(instring, loc) 2957 if not result: 2958 raise ParseException(instring, loc, self.errmsg, self) 2959 2960 loc = result.end() 2961 ret = ParseResults(result.group()) 2962 d = result.groupdict() 2963 if d: 2964 for k, v in d.items(): 2965 ret[k] = v 2966 return loc, ret 2967 2968 def parseImplAsGroupList(self, instring, loc, doActions=True): 2969 result = self.re_match(instring, loc) 2970 if not result: 2971 raise ParseException(instring, loc, self.errmsg, self) 2972 2973 loc = result.end() 2974 ret = result.groups() 2975 return loc, ret 2976 2977 def parseImplAsMatch(self, instring, loc, doActions=True): 2978 result = self.re_match(instring, loc) 2979 if not result: 2980 raise ParseException(instring, loc, self.errmsg, self) 2981 2982 loc = result.end() 2983 ret = result 2984 return loc, ret 2985 2986 def sub(self, repl): 2987 r""" 2988 Return :class:`Regex` with an attached parse action to transform the parsed 2989 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 2990 2991 Example:: 2992 2993 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 2994 print(make_html.transform_string("h1:main title:")) 2995 # prints "<h1>main title</h1>" 2996 """ 2997 if self.asGroupList: 2998 raise TypeError("cannot use sub() with Regex(asGroupList=True)") 2999 3000 if self.asMatch and callable(repl): 3001 raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)") 3002 3003 if self.asMatch: 3004 3005 def pa(tokens): 3006 return tokens[0].expand(repl) 3007 3008 else: 3009 3010 def pa(tokens): 3011 return self.re.sub(repl, tokens[0]) 3012 3013 return self.add_parse_action(pa) 3014 3015 3016class QuotedString(Token): 3017 r""" 3018 Token for matching strings that are delimited by quoting characters. 3019 3020 Defined with the following parameters: 3021 3022 - ``quote_char`` - string of one or more characters defining the 3023 quote delimiting string 3024 - ``esc_char`` - character to re_escape quotes, typically backslash 3025 (default= ``None``) 3026 - ``esc_quote`` - special quote sequence to re_escape an embedded quote 3027 string (such as SQL's ``""`` to re_escape an embedded ``"``) 3028 (default= ``None``) 3029 - ``multiline`` - boolean indicating whether quotes can span 3030 multiple lines (default= ``False``) 3031 - ``unquote_results`` - boolean indicating whether the matched text 3032 should be unquoted (default= ``True``) 3033 - ``end_quote_char`` - string of one or more characters defining the 3034 end of the quote delimited string (default= ``None`` => same as 3035 quote_char) 3036 - ``convert_whitespace_escapes`` - convert escaped whitespace 3037 (``'\t'``, ``'\n'``, etc.) to actual whitespace 3038 (default= ``True``) 3039 3040 Example:: 3041 3042 qs = QuotedString('"') 3043 print(qs.search_string('lsjdf "This is the quote" sldjf')) 3044 complex_qs = QuotedString('{{', end_quote_char='}}') 3045 print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf')) 3046 sql_qs = QuotedString('"', esc_quote='""') 3047 print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 3048 3049 prints:: 3050 3051 [['This is the quote']] 3052 [['This is the "quote"']] 3053 [['This is the quote with "embedded" quotes']] 3054 """ 3055 ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r")) 3056 3057 def __init__( 3058 self, 3059 quote_char: str = "", 3060 esc_char: OptionalType[str] = None, 3061 esc_quote: OptionalType[str] = None, 3062 multiline: bool = False, 3063 unquote_results: bool = True, 3064 end_quote_char: OptionalType[str] = None, 3065 convert_whitespace_escapes: bool = True, 3066 *, 3067 quoteChar: str = "", 3068 escChar: OptionalType[str] = None, 3069 escQuote: OptionalType[str] = None, 3070 unquoteResults: bool = True, 3071 endQuoteChar: OptionalType[str] = None, 3072 convertWhitespaceEscapes: bool = True, 3073 ): 3074 super().__init__() 3075 escChar = escChar or esc_char 3076 escQuote = escQuote or esc_quote 3077 unquoteResults = unquoteResults and unquote_results 3078 endQuoteChar = endQuoteChar or end_quote_char 3079 convertWhitespaceEscapes = ( 3080 convertWhitespaceEscapes and convert_whitespace_escapes 3081 ) 3082 quote_char = quoteChar or quote_char 3083 3084 # remove white space from quote chars - wont work anyway 3085 quote_char = quote_char.strip() 3086 if not quote_char: 3087 raise ValueError("quote_char cannot be the empty string") 3088 3089 if endQuoteChar is None: 3090 endQuoteChar = quote_char 3091 else: 3092 endQuoteChar = endQuoteChar.strip() 3093 if not endQuoteChar: 3094 raise ValueError("endQuoteChar cannot be the empty string") 3095 3096 self.quoteChar = quote_char 3097 self.quoteCharLen = len(quote_char) 3098 self.firstQuoteChar = quote_char[0] 3099 self.endQuoteChar = endQuoteChar 3100 self.endQuoteCharLen = len(endQuoteChar) 3101 self.escChar = escChar 3102 self.escQuote = escQuote 3103 self.unquoteResults = unquoteResults 3104 self.convertWhitespaceEscapes = convertWhitespaceEscapes 3105 3106 sep = "" 3107 inner_pattern = "" 3108 3109 if escQuote: 3110 inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote)) 3111 sep = "|" 3112 3113 if escChar: 3114 inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar)) 3115 sep = "|" 3116 self.escCharReplacePattern = re.escape(self.escChar) + "(.)" 3117 3118 if len(self.endQuoteChar) > 1: 3119 inner_pattern += ( 3120 "{}(?:".format(sep) 3121 + "|".join( 3122 "(?:{}(?!{}))".format( 3123 re.escape(self.endQuoteChar[:i]), 3124 _escape_regex_range_chars(self.endQuoteChar[i:]), 3125 ) 3126 for i in range(len(self.endQuoteChar) - 1, 0, -1) 3127 ) 3128 + ")" 3129 ) 3130 sep = "|" 3131 3132 if multiline: 3133 self.flags = re.MULTILINE | re.DOTALL 3134 inner_pattern += r"{}(?:[^{}{}])".format( 3135 sep, 3136 _escape_regex_range_chars(self.endQuoteChar[0]), 3137 (_escape_regex_range_chars(escChar) if escChar is not None else ""), 3138 ) 3139 else: 3140 self.flags = 0 3141 inner_pattern += r"{}(?:[^{}\n\r{}])".format( 3142 sep, 3143 _escape_regex_range_chars(self.endQuoteChar[0]), 3144 (_escape_regex_range_chars(escChar) if escChar is not None else ""), 3145 ) 3146 3147 self.pattern = "".join( 3148 [ 3149 re.escape(self.quoteChar), 3150 "(?:", 3151 inner_pattern, 3152 ")*", 3153 re.escape(self.endQuoteChar), 3154 ] 3155 ) 3156 3157 try: 3158 self.re = re.compile(self.pattern, self.flags) 3159 self.reString = self.pattern 3160 self.re_match = self.re.match 3161 except sre_constants.error: 3162 raise ValueError( 3163 "invalid pattern {!r} passed to Regex".format(self.pattern) 3164 ) 3165 3166 self.errmsg = "Expected " + self.name 3167 self.mayIndexError = False 3168 self.mayReturnEmpty = True 3169 3170 def _generateDefaultName(self): 3171 if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type): 3172 return "string enclosed in {!r}".format(self.quoteChar) 3173 3174 return "quoted string, starting with {} ending with {}".format( 3175 self.quoteChar, self.endQuoteChar 3176 ) 3177 3178 def parseImpl(self, instring, loc, doActions=True): 3179 result = ( 3180 instring[loc] == self.firstQuoteChar 3181 and self.re_match(instring, loc) 3182 or None 3183 ) 3184 if not result: 3185 raise ParseException(instring, loc, self.errmsg, self) 3186 3187 loc = result.end() 3188 ret = result.group() 3189 3190 if self.unquoteResults: 3191 3192 # strip off quotes 3193 ret = ret[self.quoteCharLen : -self.endQuoteCharLen] 3194 3195 if isinstance(ret, str_type): 3196 # replace escaped whitespace 3197 if "\\" in ret and self.convertWhitespaceEscapes: 3198 for wslit, wschar in self.ws_map: 3199 ret = ret.replace(wslit, wschar) 3200 3201 # replace escaped characters 3202 if self.escChar: 3203 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 3204 3205 # replace escaped quotes 3206 if self.escQuote: 3207 ret = ret.replace(self.escQuote, self.endQuoteChar) 3208 3209 return loc, ret 3210 3211 3212class CharsNotIn(Token): 3213 """Token for matching words composed of characters *not* in a given 3214 set (will include whitespace in matched characters if not listed in 3215 the provided exclusion set - see example). Defined with string 3216 containing all disallowed characters, and an optional minimum, 3217 maximum, and/or exact length. The default value for ``min`` is 3218 1 (a minimum value < 1 is not valid); the default values for 3219 ``max`` and ``exact`` are 0, meaning no maximum or exact 3220 length restriction. 3221 3222 Example:: 3223 3224 # define a comma-separated-value as anything that is not a ',' 3225 csv_value = CharsNotIn(',') 3226 print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213")) 3227 3228 prints:: 3229 3230 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 3231 """ 3232 3233 def __init__( 3234 self, 3235 not_chars: str = "", 3236 min: int = 1, 3237 max: int = 0, 3238 exact: int = 0, 3239 *, 3240 notChars: str = "", 3241 ): 3242 super().__init__() 3243 self.skipWhitespace = False 3244 self.notChars = not_chars or notChars 3245 self.notCharsSet = set(self.notChars) 3246 3247 if min < 1: 3248 raise ValueError( 3249 "cannot specify a minimum length < 1; use " 3250 "Opt(CharsNotIn()) if zero-length char group is permitted" 3251 ) 3252 3253 self.minLen = min 3254 3255 if max > 0: 3256 self.maxLen = max 3257 else: 3258 self.maxLen = _MAX_INT 3259 3260 if exact > 0: 3261 self.maxLen = exact 3262 self.minLen = exact 3263 3264 self.errmsg = "Expected " + self.name 3265 self.mayReturnEmpty = self.minLen == 0 3266 self.mayIndexError = False 3267 3268 def _generateDefaultName(self): 3269 not_chars_str = _collapse_string_to_ranges(self.notChars) 3270 if len(not_chars_str) > 16: 3271 return "!W:({}...)".format(self.notChars[: 16 - 3]) 3272 else: 3273 return "!W:({})".format(self.notChars) 3274 3275 def parseImpl(self, instring, loc, doActions=True): 3276 notchars = self.notCharsSet 3277 if instring[loc] in notchars: 3278 raise ParseException(instring, loc, self.errmsg, self) 3279 3280 start = loc 3281 loc += 1 3282 maxlen = min(start + self.maxLen, len(instring)) 3283 while loc < maxlen and instring[loc] not in notchars: 3284 loc += 1 3285 3286 if loc - start < self.minLen: 3287 raise ParseException(instring, loc, self.errmsg, self) 3288 3289 return loc, instring[start:loc] 3290 3291 3292class White(Token): 3293 """Special matching class for matching whitespace. Normally, 3294 whitespace is ignored by pyparsing grammars. This class is included 3295 when some whitespace structures are significant. Define with 3296 a string containing the whitespace characters to be matched; default 3297 is ``" \\t\\r\\n"``. Also takes optional ``min``, 3298 ``max``, and ``exact`` arguments, as defined for the 3299 :class:`Word` class. 3300 """ 3301 3302 whiteStrs = { 3303 " ": "<SP>", 3304 "\t": "<TAB>", 3305 "\n": "<LF>", 3306 "\r": "<CR>", 3307 "\f": "<FF>", 3308 "\u00A0": "<NBSP>", 3309 "\u1680": "<OGHAM_SPACE_MARK>", 3310 "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>", 3311 "\u2000": "<EN_QUAD>", 3312 "\u2001": "<EM_QUAD>", 3313 "\u2002": "<EN_SPACE>", 3314 "\u2003": "<EM_SPACE>", 3315 "\u2004": "<THREE-PER-EM_SPACE>", 3316 "\u2005": "<FOUR-PER-EM_SPACE>", 3317 "\u2006": "<SIX-PER-EM_SPACE>", 3318 "\u2007": "<FIGURE_SPACE>", 3319 "\u2008": "<PUNCTUATION_SPACE>", 3320 "\u2009": "<THIN_SPACE>", 3321 "\u200A": "<HAIR_SPACE>", 3322 "\u200B": "<ZERO_WIDTH_SPACE>", 3323 "\u202F": "<NNBSP>", 3324 "\u205F": "<MMSP>", 3325 "\u3000": "<IDEOGRAPHIC_SPACE>", 3326 } 3327 3328 def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0): 3329 super().__init__() 3330 self.matchWhite = ws 3331 self.set_whitespace_chars( 3332 "".join(c for c in self.whiteChars if c not in self.matchWhite), 3333 copy_defaults=True, 3334 ) 3335 # self.leave_whitespace() 3336 self.mayReturnEmpty = True 3337 self.errmsg = "Expected " + self.name 3338 3339 self.minLen = min 3340 3341 if max > 0: 3342 self.maxLen = max 3343 else: 3344 self.maxLen = _MAX_INT 3345 3346 if exact > 0: 3347 self.maxLen = exact 3348 self.minLen = exact 3349 3350 def _generateDefaultName(self): 3351 return "".join(White.whiteStrs[c] for c in self.matchWhite) 3352 3353 def parseImpl(self, instring, loc, doActions=True): 3354 if instring[loc] not in self.matchWhite: 3355 raise ParseException(instring, loc, self.errmsg, self) 3356 start = loc 3357 loc += 1 3358 maxloc = start + self.maxLen 3359 maxloc = min(maxloc, len(instring)) 3360 while loc < maxloc and instring[loc] in self.matchWhite: 3361 loc += 1 3362 3363 if loc - start < self.minLen: 3364 raise ParseException(instring, loc, self.errmsg, self) 3365 3366 return loc, instring[start:loc] 3367 3368 3369class PositionToken(Token): 3370 def __init__(self): 3371 super().__init__() 3372 self.mayReturnEmpty = True 3373 self.mayIndexError = False 3374 3375 3376class GoToColumn(PositionToken): 3377 """Token to advance to a specific column of input text; useful for 3378 tabular report scraping. 3379 """ 3380 3381 def __init__(self, colno: int): 3382 super().__init__() 3383 self.col = colno 3384 3385 def preParse(self, instring, loc): 3386 if col(loc, instring) != self.col: 3387 instrlen = len(instring) 3388 if self.ignoreExprs: 3389 loc = self._skipIgnorables(instring, loc) 3390 while ( 3391 loc < instrlen 3392 and instring[loc].isspace() 3393 and col(loc, instring) != self.col 3394 ): 3395 loc += 1 3396 return loc 3397 3398 def parseImpl(self, instring, loc, doActions=True): 3399 thiscol = col(loc, instring) 3400 if thiscol > self.col: 3401 raise ParseException(instring, loc, "Text not in expected column", self) 3402 newloc = loc + self.col - thiscol 3403 ret = instring[loc:newloc] 3404 return newloc, ret 3405 3406 3407class LineStart(PositionToken): 3408 r"""Matches if current position is at the beginning of a line within 3409 the parse string 3410 3411 Example:: 3412 3413 test = '''\ 3414 AAA this line 3415 AAA and this line 3416 AAA but not this one 3417 B AAA and definitely not this one 3418 ''' 3419 3420 for t in (LineStart() + 'AAA' + restOfLine).search_string(test): 3421 print(t) 3422 3423 prints:: 3424 3425 ['AAA', ' this line'] 3426 ['AAA', ' and this line'] 3427 3428 """ 3429 3430 def __init__(self): 3431 super().__init__() 3432 self.leave_whitespace() 3433 self.orig_whiteChars = set() | self.whiteChars 3434 self.whiteChars.discard("\n") 3435 self.skipper = Empty().set_whitespace_chars(self.whiteChars) 3436 self.errmsg = "Expected start of line" 3437 3438 def preParse(self, instring, loc): 3439 if loc == 0: 3440 return loc 3441 else: 3442 ret = self.skipper.preParse(instring, loc) 3443 if "\n" in self.orig_whiteChars: 3444 while instring[ret : ret + 1] == "\n": 3445 ret = self.skipper.preParse(instring, ret + 1) 3446 return ret 3447 3448 def parseImpl(self, instring, loc, doActions=True): 3449 if col(loc, instring) == 1: 3450 return loc, [] 3451 raise ParseException(instring, loc, self.errmsg, self) 3452 3453 3454class LineEnd(PositionToken): 3455 """Matches if current position is at the end of a line within the 3456 parse string 3457 """ 3458 3459 def __init__(self): 3460 super().__init__() 3461 self.whiteChars.discard("\n") 3462 self.set_whitespace_chars(self.whiteChars, copy_defaults=False) 3463 self.errmsg = "Expected end of line" 3464 3465 def parseImpl(self, instring, loc, doActions=True): 3466 if loc < len(instring): 3467 if instring[loc] == "\n": 3468 return loc + 1, "\n" 3469 else: 3470 raise ParseException(instring, loc, self.errmsg, self) 3471 elif loc == len(instring): 3472 return loc + 1, [] 3473 else: 3474 raise ParseException(instring, loc, self.errmsg, self) 3475 3476 3477class StringStart(PositionToken): 3478 """Matches if current position is at the beginning of the parse 3479 string 3480 """ 3481 3482 def __init__(self): 3483 super().__init__() 3484 self.errmsg = "Expected start of text" 3485 3486 def parseImpl(self, instring, loc, doActions=True): 3487 if loc != 0: 3488 # see if entire string up to here is just whitespace and ignoreables 3489 if loc != self.preParse(instring, 0): 3490 raise ParseException(instring, loc, self.errmsg, self) 3491 return loc, [] 3492 3493 3494class StringEnd(PositionToken): 3495 """ 3496 Matches if current position is at the end of the parse string 3497 """ 3498 3499 def __init__(self): 3500 super().__init__() 3501 self.errmsg = "Expected end of text" 3502 3503 def parseImpl(self, instring, loc, doActions=True): 3504 if loc < len(instring): 3505 raise ParseException(instring, loc, self.errmsg, self) 3506 elif loc == len(instring): 3507 return loc + 1, [] 3508 elif loc > len(instring): 3509 return loc, [] 3510 else: 3511 raise ParseException(instring, loc, self.errmsg, self) 3512 3513 3514class WordStart(PositionToken): 3515 """Matches if the current position is at the beginning of a 3516 :class:`Word`, and is not preceded by any character in a given 3517 set of ``word_chars`` (default= ``printables``). To emulate the 3518 ``\b`` behavior of regular expressions, use 3519 ``WordStart(alphanums)``. ``WordStart`` will also match at 3520 the beginning of the string being parsed, or at the beginning of 3521 a line. 3522 """ 3523 3524 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 3525 wordChars = word_chars if wordChars != printables else wordChars 3526 super().__init__() 3527 self.wordChars = set(wordChars) 3528 self.errmsg = "Not at the start of a word" 3529 3530 def parseImpl(self, instring, loc, doActions=True): 3531 if loc != 0: 3532 if ( 3533 instring[loc - 1] in self.wordChars 3534 or instring[loc] not in self.wordChars 3535 ): 3536 raise ParseException(instring, loc, self.errmsg, self) 3537 return loc, [] 3538 3539 3540class WordEnd(PositionToken): 3541 """Matches if the current position is at the end of a :class:`Word`, 3542 and is not followed by any character in a given set of ``word_chars`` 3543 (default= ``printables``). To emulate the ``\b`` behavior of 3544 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 3545 will also match at the end of the string being parsed, or at the end 3546 of a line. 3547 """ 3548 3549 def __init__(self, word_chars: str = printables, *, wordChars: str = printables): 3550 wordChars = word_chars if wordChars != printables else wordChars 3551 super().__init__() 3552 self.wordChars = set(wordChars) 3553 self.skipWhitespace = False 3554 self.errmsg = "Not at the end of a word" 3555 3556 def parseImpl(self, instring, loc, doActions=True): 3557 instrlen = len(instring) 3558 if instrlen > 0 and loc < instrlen: 3559 if ( 3560 instring[loc] in self.wordChars 3561 or instring[loc - 1] not in self.wordChars 3562 ): 3563 raise ParseException(instring, loc, self.errmsg, self) 3564 return loc, [] 3565 3566 3567class ParseExpression(ParserElement): 3568 """Abstract subclass of ParserElement, for combining and 3569 post-processing parsed tokens. 3570 """ 3571 3572 def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False): 3573 super().__init__(savelist) 3574 self.exprs: List[ParserElement] 3575 if isinstance(exprs, _generatorType): 3576 exprs = list(exprs) 3577 3578 if isinstance(exprs, str_type): 3579 self.exprs = [self._literalStringClass(exprs)] 3580 elif isinstance(exprs, ParserElement): 3581 self.exprs = [exprs] 3582 elif isinstance(exprs, Iterable): 3583 exprs = list(exprs) 3584 # if sequence of strings provided, wrap with Literal 3585 if any(isinstance(expr, str_type) for expr in exprs): 3586 exprs = ( 3587 self._literalStringClass(e) if isinstance(e, str_type) else e 3588 for e in exprs 3589 ) 3590 self.exprs = list(exprs) 3591 else: 3592 try: 3593 self.exprs = list(exprs) 3594 except TypeError: 3595 self.exprs = [exprs] 3596 self.callPreparse = False 3597 3598 def recurse(self): 3599 return self.exprs[:] 3600 3601 def append(self, other): 3602 self.exprs.append(other) 3603 self._defaultName = None 3604 return self 3605 3606 def leave_whitespace(self, recursive=True): 3607 """ 3608 Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 3609 all contained expressions. 3610 """ 3611 super().leave_whitespace(recursive) 3612 3613 if recursive: 3614 self.exprs = [e.copy() for e in self.exprs] 3615 for e in self.exprs: 3616 e.leave_whitespace(recursive) 3617 return self 3618 3619 def ignore_whitespace(self, recursive=True): 3620 """ 3621 Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on 3622 all contained expressions. 3623 """ 3624 super().ignore_whitespace(recursive) 3625 if recursive: 3626 self.exprs = [e.copy() for e in self.exprs] 3627 for e in self.exprs: 3628 e.ignore_whitespace(recursive) 3629 return self 3630 3631 def ignore(self, other): 3632 if isinstance(other, Suppress): 3633 if other not in self.ignoreExprs: 3634 super().ignore(other) 3635 for e in self.exprs: 3636 e.ignore(self.ignoreExprs[-1]) 3637 else: 3638 super().ignore(other) 3639 for e in self.exprs: 3640 e.ignore(self.ignoreExprs[-1]) 3641 return self 3642 3643 def _generateDefaultName(self): 3644 return "{}:({})".format(self.__class__.__name__, str(self.exprs)) 3645 3646 def streamline(self): 3647 if self.streamlined: 3648 return self 3649 3650 super().streamline() 3651 3652 for e in self.exprs: 3653 e.streamline() 3654 3655 # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)`` 3656 # but only if there are no parse actions or resultsNames on the nested And's 3657 # (likewise for :class:`Or`'s and :class:`MatchFirst`'s) 3658 if len(self.exprs) == 2: 3659 other = self.exprs[0] 3660 if ( 3661 isinstance(other, self.__class__) 3662 and not other.parseAction 3663 and other.resultsName is None 3664 and not other.debug 3665 ): 3666 self.exprs = other.exprs[:] + [self.exprs[1]] 3667 self._defaultName = None 3668 self.mayReturnEmpty |= other.mayReturnEmpty 3669 self.mayIndexError |= other.mayIndexError 3670 3671 other = self.exprs[-1] 3672 if ( 3673 isinstance(other, self.__class__) 3674 and not other.parseAction 3675 and other.resultsName is None 3676 and not other.debug 3677 ): 3678 self.exprs = self.exprs[:-1] + other.exprs[:] 3679 self._defaultName = None 3680 self.mayReturnEmpty |= other.mayReturnEmpty 3681 self.mayIndexError |= other.mayIndexError 3682 3683 self.errmsg = "Expected " + str(self) 3684 3685 return self 3686 3687 def validate(self, validateTrace=None): 3688 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 3689 for e in self.exprs: 3690 e.validate(tmp) 3691 self._checkRecursion([]) 3692 3693 def copy(self): 3694 ret = super().copy() 3695 ret.exprs = [e.copy() for e in self.exprs] 3696 return ret 3697 3698 def _setResultsName(self, name, listAllMatches=False): 3699 if ( 3700 __diag__.warn_ungrouped_named_tokens_in_collection 3701 and Diagnostics.warn_ungrouped_named_tokens_in_collection 3702 not in self.suppress_warnings_ 3703 ): 3704 for e in self.exprs: 3705 if ( 3706 isinstance(e, ParserElement) 3707 and e.resultsName 3708 and Diagnostics.warn_ungrouped_named_tokens_in_collection 3709 not in e.suppress_warnings_ 3710 ): 3711 warnings.warn( 3712 "{}: setting results name {!r} on {} expression " 3713 "collides with {!r} on contained expression".format( 3714 "warn_ungrouped_named_tokens_in_collection", 3715 name, 3716 type(self).__name__, 3717 e.resultsName, 3718 ), 3719 stacklevel=3, 3720 ) 3721 3722 return super()._setResultsName(name, listAllMatches) 3723 3724 ignoreWhitespace = ignore_whitespace 3725 leaveWhitespace = leave_whitespace 3726 3727 3728class And(ParseExpression): 3729 """ 3730 Requires all given :class:`ParseExpression` s to be found in the given order. 3731 Expressions may be separated by whitespace. 3732 May be constructed using the ``'+'`` operator. 3733 May also be constructed using the ``'-'`` operator, which will 3734 suppress backtracking. 3735 3736 Example:: 3737 3738 integer = Word(nums) 3739 name_expr = OneOrMore(Word(alphas)) 3740 3741 expr = And([integer("id"), name_expr("name"), integer("age")]) 3742 # more easily written as: 3743 expr = integer("id") + name_expr("name") + integer("age") 3744 """ 3745 3746 class _ErrorStop(Empty): 3747 def __init__(self, *args, **kwargs): 3748 super().__init__(*args, **kwargs) 3749 self.leave_whitespace() 3750 3751 def _generateDefaultName(self): 3752 return "-" 3753 3754 def __init__(self, exprs_arg: IterableType[ParserElement], savelist: bool = True): 3755 exprs: List[ParserElement] = list(exprs_arg) 3756 if exprs and Ellipsis in exprs: 3757 tmp = [] 3758 for i, expr in enumerate(exprs): 3759 if expr is Ellipsis: 3760 if i < len(exprs) - 1: 3761 skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1] 3762 tmp.append(SkipTo(skipto_arg)("_skipped*")) 3763 else: 3764 raise Exception( 3765 "cannot construct And with sequence ending in ..." 3766 ) 3767 else: 3768 tmp.append(expr) 3769 exprs[:] = tmp 3770 super().__init__(exprs, savelist) 3771 if self.exprs: 3772 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3773 self.set_whitespace_chars( 3774 self.exprs[0].whiteChars, 3775 copy_defaults=self.exprs[0].copyDefaultWhiteChars, 3776 ) 3777 self.skipWhitespace = self.exprs[0].skipWhitespace 3778 else: 3779 self.mayReturnEmpty = True 3780 self.callPreparse = True 3781 3782 def streamline(self) -> ParserElement: 3783 # collapse any _PendingSkip's 3784 if self.exprs: 3785 if any( 3786 isinstance(e, ParseExpression) 3787 and e.exprs 3788 and isinstance(e.exprs[-1], _PendingSkip) 3789 for e in self.exprs[:-1] 3790 ): 3791 for i, e in enumerate(self.exprs[:-1]): 3792 if e is None: 3793 continue 3794 if ( 3795 isinstance(e, ParseExpression) 3796 and e.exprs 3797 and isinstance(e.exprs[-1], _PendingSkip) 3798 ): 3799 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 3800 self.exprs[i + 1] = None 3801 self.exprs = [e for e in self.exprs if e is not None] 3802 3803 super().streamline() 3804 3805 # link any IndentedBlocks to the prior expression 3806 for prev, cur in zip(self.exprs, self.exprs[1:]): 3807 # traverse cur or any first embedded expr of cur looking for an IndentedBlock 3808 # (but watch out for recursive grammar) 3809 seen = set() 3810 while cur: 3811 if id(cur) in seen: 3812 break 3813 seen.add(id(cur)) 3814 if isinstance(cur, IndentedBlock): 3815 prev.add_parse_action( 3816 lambda s, l, t: setattr(cur, "parent_anchor", col(l, s)) 3817 ) 3818 break 3819 subs = cur.recurse() 3820 cur = next(iter(subs), None) 3821 3822 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3823 return self 3824 3825 def parseImpl(self, instring, loc, doActions=True): 3826 # pass False as callPreParse arg to _parse for first element, since we already 3827 # pre-parsed the string as part of our And pre-parsing 3828 loc, resultlist = self.exprs[0]._parse( 3829 instring, loc, doActions, callPreParse=False 3830 ) 3831 errorStop = False 3832 for e in self.exprs[1:]: 3833 # if isinstance(e, And._ErrorStop): 3834 if type(e) is And._ErrorStop: 3835 errorStop = True 3836 continue 3837 if errorStop: 3838 try: 3839 loc, exprtokens = e._parse(instring, loc, doActions) 3840 except ParseSyntaxException: 3841 raise 3842 except ParseBaseException as pe: 3843 pe.__traceback__ = None 3844 raise ParseSyntaxException._from_exception(pe) 3845 except IndexError: 3846 raise ParseSyntaxException( 3847 instring, len(instring), self.errmsg, self 3848 ) 3849 else: 3850 loc, exprtokens = e._parse(instring, loc, doActions) 3851 if exprtokens or exprtokens.haskeys(): 3852 resultlist += exprtokens 3853 return loc, resultlist 3854 3855 def __iadd__(self, other): 3856 if isinstance(other, str_type): 3857 other = self._literalStringClass(other) 3858 return self.append(other) # And([self, other]) 3859 3860 def _checkRecursion(self, parseElementList): 3861 subRecCheckList = parseElementList[:] + [self] 3862 for e in self.exprs: 3863 e._checkRecursion(subRecCheckList) 3864 if not e.mayReturnEmpty: 3865 break 3866 3867 def _generateDefaultName(self): 3868 inner = " ".join(str(e) for e in self.exprs) 3869 # strip off redundant inner {}'s 3870 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 3871 inner = inner[1:-1] 3872 return "{" + inner + "}" 3873 3874 3875class Or(ParseExpression): 3876 """Requires that at least one :class:`ParseExpression` is found. If 3877 two expressions match, the expression that matches the longest 3878 string will be used. May be constructed using the ``'^'`` 3879 operator. 3880 3881 Example:: 3882 3883 # construct Or using '^' operator 3884 3885 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3886 print(number.search_string("123 3.1416 789")) 3887 3888 prints:: 3889 3890 [['123'], ['3.1416'], ['789']] 3891 """ 3892 3893 def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False): 3894 super().__init__(exprs, savelist) 3895 if self.exprs: 3896 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3897 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 3898 else: 3899 self.mayReturnEmpty = True 3900 3901 def streamline(self) -> ParserElement: 3902 super().streamline() 3903 if self.exprs: 3904 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3905 self.saveAsList = any(e.saveAsList for e in self.exprs) 3906 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 3907 else: 3908 self.saveAsList = False 3909 return self 3910 3911 def parseImpl(self, instring, loc, doActions=True): 3912 maxExcLoc = -1 3913 maxException = None 3914 matches = [] 3915 fatals = [] 3916 if all(e.callPreparse for e in self.exprs): 3917 loc = self.preParse(instring, loc) 3918 for e in self.exprs: 3919 try: 3920 loc2 = e.try_parse(instring, loc, raise_fatal=True) 3921 except ParseFatalException as pfe: 3922 pfe.__traceback__ = None 3923 pfe.parserElement = e 3924 fatals.append(pfe) 3925 maxException = None 3926 maxExcLoc = -1 3927 except ParseException as err: 3928 if not fatals: 3929 err.__traceback__ = None 3930 if err.loc > maxExcLoc: 3931 maxException = err 3932 maxExcLoc = err.loc 3933 except IndexError: 3934 if len(instring) > maxExcLoc: 3935 maxException = ParseException( 3936 instring, len(instring), e.errmsg, self 3937 ) 3938 maxExcLoc = len(instring) 3939 else: 3940 # save match among all matches, to retry longest to shortest 3941 matches.append((loc2, e)) 3942 3943 if matches: 3944 # re-evaluate all matches in descending order of length of match, in case attached actions 3945 # might change whether or how much they match of the input. 3946 matches.sort(key=itemgetter(0), reverse=True) 3947 3948 if not doActions: 3949 # no further conditions or parse actions to change the selection of 3950 # alternative, so the first match will be the best match 3951 best_expr = matches[0][1] 3952 return best_expr._parse(instring, loc, doActions) 3953 3954 longest = -1, None 3955 for loc1, expr1 in matches: 3956 if loc1 <= longest[0]: 3957 # already have a longer match than this one will deliver, we are done 3958 return longest 3959 3960 try: 3961 loc2, toks = expr1._parse(instring, loc, doActions) 3962 except ParseException as err: 3963 err.__traceback__ = None 3964 if err.loc > maxExcLoc: 3965 maxException = err 3966 maxExcLoc = err.loc 3967 else: 3968 if loc2 >= loc1: 3969 return loc2, toks 3970 # didn't match as much as before 3971 elif loc2 > longest[0]: 3972 longest = loc2, toks 3973 3974 if longest != (-1, None): 3975 return longest 3976 3977 if fatals: 3978 if len(fatals) > 1: 3979 fatals.sort(key=lambda e: -e.loc) 3980 if fatals[0].loc == fatals[1].loc: 3981 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) 3982 max_fatal = fatals[0] 3983 raise max_fatal 3984 3985 if maxException is not None: 3986 maxException.msg = self.errmsg 3987 raise maxException 3988 else: 3989 raise ParseException( 3990 instring, loc, "no defined alternatives to match", self 3991 ) 3992 3993 def __ixor__(self, other): 3994 if isinstance(other, str_type): 3995 other = self._literalStringClass(other) 3996 return self.append(other) # Or([self, other]) 3997 3998 def _generateDefaultName(self): 3999 return "{" + " ^ ".join(str(e) for e in self.exprs) + "}" 4000 4001 def _setResultsName(self, name, listAllMatches=False): 4002 if ( 4003 __diag__.warn_multiple_tokens_in_named_alternation 4004 and Diagnostics.warn_multiple_tokens_in_named_alternation 4005 not in self.suppress_warnings_ 4006 ): 4007 if any( 4008 isinstance(e, And) 4009 and Diagnostics.warn_multiple_tokens_in_named_alternation 4010 not in e.suppress_warnings_ 4011 for e in self.exprs 4012 ): 4013 warnings.warn( 4014 "{}: setting results name {!r} on {} expression " 4015 "will return a list of all parsed tokens in an And alternative, " 4016 "in prior versions only the first token was returned; enclose" 4017 "contained argument in Group".format( 4018 "warn_multiple_tokens_in_named_alternation", 4019 name, 4020 type(self).__name__, 4021 ), 4022 stacklevel=3, 4023 ) 4024 4025 return super()._setResultsName(name, listAllMatches) 4026 4027 4028class MatchFirst(ParseExpression): 4029 """Requires that at least one :class:`ParseExpression` is found. If 4030 more than one expression matches, the first one listed is the one that will 4031 match. May be constructed using the ``'|'`` operator. 4032 4033 Example:: 4034 4035 # construct MatchFirst using '|' operator 4036 4037 # watch the order of expressions to match 4038 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 4039 print(number.search_string("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 4040 4041 # put more selective expression first 4042 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 4043 print(number.search_string("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 4044 """ 4045 4046 def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False): 4047 super().__init__(exprs, savelist) 4048 if self.exprs: 4049 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 4050 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 4051 else: 4052 self.mayReturnEmpty = True 4053 4054 def streamline(self) -> ParserElement: 4055 if self.streamlined: 4056 return self 4057 4058 super().streamline() 4059 if self.exprs: 4060 self.saveAsList = any(e.saveAsList for e in self.exprs) 4061 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 4062 self.skipWhitespace = all(e.skipWhitespace for e in self.exprs) 4063 else: 4064 self.saveAsList = False 4065 self.mayReturnEmpty = True 4066 return self 4067 4068 def parseImpl(self, instring, loc, doActions=True): 4069 maxExcLoc = -1 4070 maxException = None 4071 4072 for e in self.exprs: 4073 try: 4074 return e._parse( 4075 instring, 4076 loc, 4077 doActions, 4078 ) 4079 except ParseFatalException as pfe: 4080 pfe.__traceback__ = None 4081 pfe.parserElement = e 4082 raise 4083 except ParseException as err: 4084 if err.loc > maxExcLoc: 4085 maxException = err 4086 maxExcLoc = err.loc 4087 except IndexError: 4088 if len(instring) > maxExcLoc: 4089 maxException = ParseException( 4090 instring, len(instring), e.errmsg, self 4091 ) 4092 maxExcLoc = len(instring) 4093 4094 if maxException is not None: 4095 maxException.msg = self.errmsg 4096 raise maxException 4097 else: 4098 raise ParseException( 4099 instring, loc, "no defined alternatives to match", self 4100 ) 4101 4102 def __ior__(self, other): 4103 if isinstance(other, str_type): 4104 other = self._literalStringClass(other) 4105 return self.append(other) # MatchFirst([self, other]) 4106 4107 def _generateDefaultName(self): 4108 return "{" + " | ".join(str(e) for e in self.exprs) + "}" 4109 4110 def _setResultsName(self, name, listAllMatches=False): 4111 if ( 4112 __diag__.warn_multiple_tokens_in_named_alternation 4113 and Diagnostics.warn_multiple_tokens_in_named_alternation 4114 not in self.suppress_warnings_ 4115 ): 4116 if any( 4117 isinstance(e, And) 4118 and Diagnostics.warn_multiple_tokens_in_named_alternation 4119 not in e.suppress_warnings_ 4120 for e in self.exprs 4121 ): 4122 warnings.warn( 4123 "{}: setting results name {!r} on {} expression " 4124 "will return a list of all parsed tokens in an And alternative, " 4125 "in prior versions only the first token was returned; enclose" 4126 "contained argument in Group".format( 4127 "warn_multiple_tokens_in_named_alternation", 4128 name, 4129 type(self).__name__, 4130 ), 4131 stacklevel=3, 4132 ) 4133 4134 return super()._setResultsName(name, listAllMatches) 4135 4136 4137class Each(ParseExpression): 4138 """Requires all given :class:`ParseExpression` s to be found, but in 4139 any order. Expressions may be separated by whitespace. 4140 4141 May be constructed using the ``'&'`` operator. 4142 4143 Example:: 4144 4145 color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 4146 shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 4147 integer = Word(nums) 4148 shape_attr = "shape:" + shape_type("shape") 4149 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 4150 color_attr = "color:" + color("color") 4151 size_attr = "size:" + integer("size") 4152 4153 # use Each (using operator '&') to accept attributes in any order 4154 # (shape and posn are required, color and size are optional) 4155 shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr) 4156 4157 shape_spec.run_tests(''' 4158 shape: SQUARE color: BLACK posn: 100, 120 4159 shape: CIRCLE size: 50 color: BLUE posn: 50,80 4160 color:GREEN size:20 shape:TRIANGLE posn:20,40 4161 ''' 4162 ) 4163 4164 prints:: 4165 4166 shape: SQUARE color: BLACK posn: 100, 120 4167 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 4168 - color: BLACK 4169 - posn: ['100', ',', '120'] 4170 - x: 100 4171 - y: 120 4172 - shape: SQUARE 4173 4174 4175 shape: CIRCLE size: 50 color: BLUE posn: 50,80 4176 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 4177 - color: BLUE 4178 - posn: ['50', ',', '80'] 4179 - x: 50 4180 - y: 80 4181 - shape: CIRCLE 4182 - size: 50 4183 4184 4185 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 4186 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 4187 - color: GREEN 4188 - posn: ['20', ',', '40'] 4189 - x: 20 4190 - y: 40 4191 - shape: TRIANGLE 4192 - size: 20 4193 """ 4194 4195 def __init__(self, exprs: IterableType[ParserElement], savelist: bool = True): 4196 super().__init__(exprs, savelist) 4197 if self.exprs: 4198 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 4199 else: 4200 self.mayReturnEmpty = True 4201 self.skipWhitespace = True 4202 self.initExprGroups = True 4203 self.saveAsList = True 4204 4205 def streamline(self) -> ParserElement: 4206 super().streamline() 4207 if self.exprs: 4208 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 4209 else: 4210 self.mayReturnEmpty = True 4211 return self 4212 4213 def parseImpl(self, instring, loc, doActions=True): 4214 if self.initExprGroups: 4215 self.opt1map = dict( 4216 (id(e.expr), e) for e in self.exprs if isinstance(e, Opt) 4217 ) 4218 opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)] 4219 opt2 = [ 4220 e 4221 for e in self.exprs 4222 if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore)) 4223 ] 4224 self.optionals = opt1 + opt2 4225 self.multioptionals = [ 4226 e.expr.set_results_name(e.resultsName, list_all_matches=True) 4227 for e in self.exprs 4228 if isinstance(e, _MultipleMatch) 4229 ] 4230 self.multirequired = [ 4231 e.expr.set_results_name(e.resultsName, list_all_matches=True) 4232 for e in self.exprs 4233 if isinstance(e, OneOrMore) 4234 ] 4235 self.required = [ 4236 e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore)) 4237 ] 4238 self.required += self.multirequired 4239 self.initExprGroups = False 4240 4241 tmpLoc = loc 4242 tmpReqd = self.required[:] 4243 tmpOpt = self.optionals[:] 4244 multis = self.multioptionals[:] 4245 matchOrder = [] 4246 4247 keepMatching = True 4248 failed = [] 4249 fatals = [] 4250 while keepMatching: 4251 tmpExprs = tmpReqd + tmpOpt + multis 4252 failed.clear() 4253 fatals.clear() 4254 for e in tmpExprs: 4255 try: 4256 tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True) 4257 except ParseFatalException as pfe: 4258 pfe.__traceback__ = None 4259 pfe.parserElement = e 4260 fatals.append(pfe) 4261 failed.append(e) 4262 except ParseException: 4263 failed.append(e) 4264 else: 4265 matchOrder.append(self.opt1map.get(id(e), e)) 4266 if e in tmpReqd: 4267 tmpReqd.remove(e) 4268 elif e in tmpOpt: 4269 tmpOpt.remove(e) 4270 if len(failed) == len(tmpExprs): 4271 keepMatching = False 4272 4273 # look for any ParseFatalExceptions 4274 if fatals: 4275 if len(fatals) > 1: 4276 fatals.sort(key=lambda e: -e.loc) 4277 if fatals[0].loc == fatals[1].loc: 4278 fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement)))) 4279 max_fatal = fatals[0] 4280 raise max_fatal 4281 4282 if tmpReqd: 4283 missing = ", ".join(str(e) for e in tmpReqd) 4284 raise ParseException( 4285 instring, 4286 loc, 4287 "Missing one or more required elements ({})".format(missing), 4288 ) 4289 4290 # add any unmatched Opts, in case they have default values defined 4291 matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt] 4292 4293 total_results = ParseResults([]) 4294 for e in matchOrder: 4295 loc, results = e._parse(instring, loc, doActions) 4296 total_results += results 4297 4298 return loc, total_results 4299 4300 def _generateDefaultName(self): 4301 return "{" + " & ".join(str(e) for e in self.exprs) + "}" 4302 4303 4304class ParseElementEnhance(ParserElement): 4305 """Abstract subclass of :class:`ParserElement`, for combining and 4306 post-processing parsed tokens. 4307 """ 4308 4309 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 4310 super().__init__(savelist) 4311 if isinstance(expr, str_type): 4312 if issubclass(self._literalStringClass, Token): 4313 expr = self._literalStringClass(expr) 4314 elif issubclass(type(self), self._literalStringClass): 4315 expr = Literal(expr) 4316 else: 4317 expr = self._literalStringClass(Literal(expr)) 4318 self.expr = expr 4319 if expr is not None: 4320 self.mayIndexError = expr.mayIndexError 4321 self.mayReturnEmpty = expr.mayReturnEmpty 4322 self.set_whitespace_chars( 4323 expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars 4324 ) 4325 self.skipWhitespace = expr.skipWhitespace 4326 self.saveAsList = expr.saveAsList 4327 self.callPreparse = expr.callPreparse 4328 self.ignoreExprs.extend(expr.ignoreExprs) 4329 4330 def recurse(self): 4331 return [self.expr] if self.expr is not None else [] 4332 4333 def parseImpl(self, instring, loc, doActions=True): 4334 if self.expr is not None: 4335 return self.expr._parse(instring, loc, doActions, callPreParse=False) 4336 else: 4337 raise ParseException("", loc, self.errmsg, self) 4338 4339 def leave_whitespace(self, recursive=True): 4340 super().leave_whitespace(recursive) 4341 4342 if recursive: 4343 self.expr = self.expr.copy() 4344 if self.expr is not None: 4345 self.expr.leave_whitespace(recursive) 4346 return self 4347 4348 def ignore_whitespace(self, recursive=True): 4349 super().ignore_whitespace(recursive) 4350 4351 if recursive: 4352 self.expr = self.expr.copy() 4353 if self.expr is not None: 4354 self.expr.ignore_whitespace(recursive) 4355 return self 4356 4357 def ignore(self, other): 4358 if isinstance(other, Suppress): 4359 if other not in self.ignoreExprs: 4360 super().ignore(other) 4361 if self.expr is not None: 4362 self.expr.ignore(self.ignoreExprs[-1]) 4363 else: 4364 super().ignore(other) 4365 if self.expr is not None: 4366 self.expr.ignore(self.ignoreExprs[-1]) 4367 return self 4368 4369 def streamline(self): 4370 super().streamline() 4371 if self.expr is not None: 4372 self.expr.streamline() 4373 return self 4374 4375 def _checkRecursion(self, parseElementList): 4376 if self in parseElementList: 4377 raise RecursiveGrammarException(parseElementList + [self]) 4378 subRecCheckList = parseElementList[:] + [self] 4379 if self.expr is not None: 4380 self.expr._checkRecursion(subRecCheckList) 4381 4382 def validate(self, validateTrace=None): 4383 if validateTrace is None: 4384 validateTrace = [] 4385 tmp = validateTrace[:] + [self] 4386 if self.expr is not None: 4387 self.expr.validate(tmp) 4388 self._checkRecursion([]) 4389 4390 def _generateDefaultName(self): 4391 return "{}:({})".format(self.__class__.__name__, str(self.expr)) 4392 4393 ignoreWhitespace = ignore_whitespace 4394 leaveWhitespace = leave_whitespace 4395 4396 4397class IndentedBlock(ParseElementEnhance): 4398 """ 4399 Expression to match one or more expressions at a given indentation level. 4400 Useful for parsing text where structure is implied by indentation (like Python source code). 4401 """ 4402 4403 class _Indent(Empty): 4404 def __init__(self, ref_col: int): 4405 super().__init__() 4406 self.errmsg = "expected indent at column {}".format(ref_col) 4407 self.add_condition(lambda s, l, t: col(l, s) == ref_col) 4408 4409 class _IndentGreater(Empty): 4410 def __init__(self, ref_col: int): 4411 super().__init__() 4412 self.errmsg = "expected indent at column greater than {}".format(ref_col) 4413 self.add_condition(lambda s, l, t: col(l, s) > ref_col) 4414 4415 def __init__( 4416 self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True 4417 ): 4418 super().__init__(expr, savelist=True) 4419 # if recursive: 4420 # raise NotImplementedError("IndentedBlock with recursive is not implemented") 4421 self._recursive = recursive 4422 self._grouped = grouped 4423 self.parent_anchor = 1 4424 4425 def parseImpl(self, instring, loc, doActions=True): 4426 # advance parse position to non-whitespace by using an Empty() 4427 # this should be the column to be used for all subsequent indented lines 4428 anchor_loc = Empty().preParse(instring, loc) 4429 4430 # see if self.expr matches at the current location - if not it will raise an exception 4431 # and no further work is necessary 4432 self.expr.try_parse(instring, anchor_loc, doActions) 4433 4434 indent_col = col(anchor_loc, instring) 4435 peer_detect_expr = self._Indent(indent_col) 4436 4437 inner_expr = Empty() + peer_detect_expr + self.expr 4438 if self._recursive: 4439 sub_indent = self._IndentGreater(indent_col) 4440 nested_block = IndentedBlock( 4441 self.expr, recursive=self._recursive, grouped=self._grouped 4442 ) 4443 nested_block.set_debug(self.debug) 4444 nested_block.parent_anchor = indent_col 4445 inner_expr += Opt(sub_indent + nested_block) 4446 4447 inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}") 4448 block = OneOrMore(inner_expr) 4449 4450 trailing_undent = self._Indent(self.parent_anchor) | StringEnd() 4451 4452 if self._grouped: 4453 wrapper = Group 4454 else: 4455 wrapper = lambda expr: expr 4456 return (wrapper(block) + Optional(trailing_undent)).parseImpl( 4457 instring, anchor_loc, doActions 4458 ) 4459 4460 4461class AtStringStart(ParseElementEnhance): 4462 """Matches if expression matches at the beginning of the parse 4463 string:: 4464 4465 AtStringStart(Word(nums)).parse_string("123") 4466 # prints ["123"] 4467 4468 AtStringStart(Word(nums)).parse_string(" 123") 4469 # raises ParseException 4470 """ 4471 4472 def __init__(self, expr: Union[ParserElement, str]): 4473 super().__init__(expr) 4474 self.callPreparse = False 4475 4476 def parseImpl(self, instring, loc, doActions=True): 4477 if loc != 0: 4478 raise ParseException(instring, loc, "not found at string start") 4479 return super().parseImpl(instring, loc, doActions) 4480 4481 4482class AtLineStart(ParseElementEnhance): 4483 r"""Matches if an expression matches at the beginning of a line within 4484 the parse string 4485 4486 Example:: 4487 4488 test = '''\ 4489 AAA this line 4490 AAA and this line 4491 AAA but not this one 4492 B AAA and definitely not this one 4493 ''' 4494 4495 for t in (AtLineStart('AAA') + restOfLine).search_string(test): 4496 print(t) 4497 4498 prints:: 4499 4500 ['AAA', ' this line'] 4501 ['AAA', ' and this line'] 4502 4503 """ 4504 4505 def __init__(self, expr: Union[ParserElement, str]): 4506 super().__init__(expr) 4507 self.callPreparse = False 4508 4509 def parseImpl(self, instring, loc, doActions=True): 4510 if col(loc, instring) != 1: 4511 raise ParseException(instring, loc, "not found at line start") 4512 return super().parseImpl(instring, loc, doActions) 4513 4514 4515class FollowedBy(ParseElementEnhance): 4516 """Lookahead matching of the given parse expression. 4517 ``FollowedBy`` does *not* advance the parsing position within 4518 the input string, it only verifies that the specified parse 4519 expression matches at the current position. ``FollowedBy`` 4520 always returns a null token list. If any results names are defined 4521 in the lookahead expression, those *will* be returned for access by 4522 name. 4523 4524 Example:: 4525 4526 # use FollowedBy to match a label only if it is followed by a ':' 4527 data_word = Word(alphas) 4528 label = data_word + FollowedBy(':') 4529 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 4530 4531 OneOrMore(attr_expr).parse_string("shape: SQUARE color: BLACK posn: upper left").pprint() 4532 4533 prints:: 4534 4535 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 4536 """ 4537 4538 def __init__(self, expr: Union[ParserElement, str]): 4539 super().__init__(expr) 4540 self.mayReturnEmpty = True 4541 4542 def parseImpl(self, instring, loc, doActions=True): 4543 # by using self._expr.parse and deleting the contents of the returned ParseResults list 4544 # we keep any named results that were defined in the FollowedBy expression 4545 _, ret = self.expr._parse(instring, loc, doActions=doActions) 4546 del ret[:] 4547 4548 return loc, ret 4549 4550 4551class PrecededBy(ParseElementEnhance): 4552 """Lookbehind matching of the given parse expression. 4553 ``PrecededBy`` does not advance the parsing position within the 4554 input string, it only verifies that the specified parse expression 4555 matches prior to the current position. ``PrecededBy`` always 4556 returns a null token list, but if a results name is defined on the 4557 given expression, it is returned. 4558 4559 Parameters: 4560 4561 - expr - expression that must match prior to the current parse 4562 location 4563 - retreat - (default= ``None``) - (int) maximum number of characters 4564 to lookbehind prior to the current parse location 4565 4566 If the lookbehind expression is a string, :class:`Literal`, 4567 :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn` 4568 with a specified exact or maximum length, then the retreat 4569 parameter is not required. Otherwise, retreat must be specified to 4570 give a maximum number of characters to look back from 4571 the current parse position for a lookbehind match. 4572 4573 Example:: 4574 4575 # VB-style variable names with type prefixes 4576 int_var = PrecededBy("#") + pyparsing_common.identifier 4577 str_var = PrecededBy("$") + pyparsing_common.identifier 4578 4579 """ 4580 4581 def __init__( 4582 self, expr: Union[ParserElement, str], retreat: OptionalType[int] = None 4583 ): 4584 super().__init__(expr) 4585 self.expr = self.expr().leave_whitespace() 4586 self.mayReturnEmpty = True 4587 self.mayIndexError = False 4588 self.exact = False 4589 if isinstance(expr, str_type): 4590 retreat = len(expr) 4591 self.exact = True 4592 elif isinstance(expr, (Literal, Keyword)): 4593 retreat = expr.matchLen 4594 self.exact = True 4595 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 4596 retreat = expr.maxLen 4597 self.exact = True 4598 elif isinstance(expr, PositionToken): 4599 retreat = 0 4600 self.exact = True 4601 self.retreat = retreat 4602 self.errmsg = "not preceded by " + str(expr) 4603 self.skipWhitespace = False 4604 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 4605 4606 def parseImpl(self, instring, loc=0, doActions=True): 4607 if self.exact: 4608 if loc < self.retreat: 4609 raise ParseException(instring, loc, self.errmsg) 4610 start = loc - self.retreat 4611 _, ret = self.expr._parse(instring, start) 4612 else: 4613 # retreat specified a maximum lookbehind window, iterate 4614 test_expr = self.expr + StringEnd() 4615 instring_slice = instring[max(0, loc - self.retreat) : loc] 4616 last_expr = ParseException(instring, loc, self.errmsg) 4617 for offset in range(1, min(loc, self.retreat + 1) + 1): 4618 try: 4619 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 4620 _, ret = test_expr._parse( 4621 instring_slice, len(instring_slice) - offset 4622 ) 4623 except ParseBaseException as pbe: 4624 last_expr = pbe 4625 else: 4626 break 4627 else: 4628 raise last_expr 4629 return loc, ret 4630 4631 4632class Located(ParseElementEnhance): 4633 """ 4634 Decorates a returned token with its starting and ending 4635 locations in the input string. 4636 4637 This helper adds the following results names: 4638 4639 - ``locn_start`` - location where matched expression begins 4640 - ``locn_end`` - location where matched expression ends 4641 - ``value`` - the actual parsed results 4642 4643 Be careful if the input text contains ``<TAB>`` characters, you 4644 may want to call :class:`ParserElement.parse_with_tabs` 4645 4646 Example:: 4647 4648 wd = Word(alphas) 4649 for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"): 4650 print(match) 4651 4652 prints:: 4653 4654 [0, ['ljsdf'], 5] 4655 [8, ['lksdjjf'], 15] 4656 [18, ['lkkjj'], 23] 4657 4658 """ 4659 4660 def parseImpl(self, instring, loc, doActions=True): 4661 start = loc 4662 loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False) 4663 ret_tokens = ParseResults([start, tokens, loc]) 4664 ret_tokens["locn_start"] = start 4665 ret_tokens["value"] = tokens 4666 ret_tokens["locn_end"] = loc 4667 if self.resultsName: 4668 # must return as a list, so that the name will be attached to the complete group 4669 return loc, [ret_tokens] 4670 else: 4671 return loc, ret_tokens 4672 4673 4674class NotAny(ParseElementEnhance): 4675 """ 4676 Lookahead to disallow matching with the given parse expression. 4677 ``NotAny`` does *not* advance the parsing position within the 4678 input string, it only verifies that the specified parse expression 4679 does *not* match at the current position. Also, ``NotAny`` does 4680 *not* skip over leading whitespace. ``NotAny`` always returns 4681 a null token list. May be constructed using the ``'~'`` operator. 4682 4683 Example:: 4684 4685 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 4686 4687 # take care not to mistake keywords for identifiers 4688 ident = ~(AND | OR | NOT) + Word(alphas) 4689 boolean_term = Opt(NOT) + ident 4690 4691 # very crude boolean expression - to support parenthesis groups and 4692 # operation hierarchy, use infix_notation 4693 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term) 4694 4695 # integers that are followed by "." are actually floats 4696 integer = Word(nums) + ~Char(".") 4697 """ 4698 4699 def __init__(self, expr: Union[ParserElement, str]): 4700 super().__init__(expr) 4701 # do NOT use self.leave_whitespace(), don't want to propagate to exprs 4702 # self.leave_whitespace() 4703 self.skipWhitespace = False 4704 4705 self.mayReturnEmpty = True 4706 self.errmsg = "Found unwanted token, " + str(self.expr) 4707 4708 def parseImpl(self, instring, loc, doActions=True): 4709 if self.expr.can_parse_next(instring, loc): 4710 raise ParseException(instring, loc, self.errmsg, self) 4711 return loc, [] 4712 4713 def _generateDefaultName(self): 4714 return "~{" + str(self.expr) + "}" 4715 4716 4717class _MultipleMatch(ParseElementEnhance): 4718 def __init__( 4719 self, 4720 expr: ParserElement, 4721 stop_on: OptionalType[Union[ParserElement, str]] = None, 4722 *, 4723 stopOn: OptionalType[Union[ParserElement, str]] = None, 4724 ): 4725 super().__init__(expr) 4726 stopOn = stopOn or stop_on 4727 self.saveAsList = True 4728 ender = stopOn 4729 if isinstance(ender, str_type): 4730 ender = self._literalStringClass(ender) 4731 self.stopOn(ender) 4732 4733 def stopOn(self, ender): 4734 if isinstance(ender, str_type): 4735 ender = self._literalStringClass(ender) 4736 self.not_ender = ~ender if ender is not None else None 4737 return self 4738 4739 def parseImpl(self, instring, loc, doActions=True): 4740 self_expr_parse = self.expr._parse 4741 self_skip_ignorables = self._skipIgnorables 4742 check_ender = self.not_ender is not None 4743 if check_ender: 4744 try_not_ender = self.not_ender.tryParse 4745 4746 # must be at least one (but first see if we are the stopOn sentinel; 4747 # if so, fail) 4748 if check_ender: 4749 try_not_ender(instring, loc) 4750 loc, tokens = self_expr_parse(instring, loc, doActions) 4751 try: 4752 hasIgnoreExprs = not not self.ignoreExprs 4753 while 1: 4754 if check_ender: 4755 try_not_ender(instring, loc) 4756 if hasIgnoreExprs: 4757 preloc = self_skip_ignorables(instring, loc) 4758 else: 4759 preloc = loc 4760 loc, tmptokens = self_expr_parse(instring, preloc, doActions) 4761 if tmptokens or tmptokens.haskeys(): 4762 tokens += tmptokens 4763 except (ParseException, IndexError): 4764 pass 4765 4766 return loc, tokens 4767 4768 def _setResultsName(self, name, listAllMatches=False): 4769 if ( 4770 __diag__.warn_ungrouped_named_tokens_in_collection 4771 and Diagnostics.warn_ungrouped_named_tokens_in_collection 4772 not in self.suppress_warnings_ 4773 ): 4774 for e in [self.expr] + self.expr.recurse(): 4775 if ( 4776 isinstance(e, ParserElement) 4777 and e.resultsName 4778 and Diagnostics.warn_ungrouped_named_tokens_in_collection 4779 not in e.suppress_warnings_ 4780 ): 4781 warnings.warn( 4782 "{}: setting results name {!r} on {} expression " 4783 "collides with {!r} on contained expression".format( 4784 "warn_ungrouped_named_tokens_in_collection", 4785 name, 4786 type(self).__name__, 4787 e.resultsName, 4788 ), 4789 stacklevel=3, 4790 ) 4791 4792 return super()._setResultsName(name, listAllMatches) 4793 4794 4795class OneOrMore(_MultipleMatch): 4796 """ 4797 Repetition of one or more of the given expression. 4798 4799 Parameters: 4800 - expr - expression that must match one or more times 4801 - stop_on - (default= ``None``) - expression for a terminating sentinel 4802 (only required if the sentinel would ordinarily match the repetition 4803 expression) 4804 4805 Example:: 4806 4807 data_word = Word(alphas) 4808 label = data_word + FollowedBy(':') 4809 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join)) 4810 4811 text = "shape: SQUARE posn: upper left color: BLACK" 4812 OneOrMore(attr_expr).parse_string(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 4813 4814 # use stop_on attribute for OneOrMore to avoid reading label string as part of the data 4815 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 4816 OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 4817 4818 # could also be written as 4819 (attr_expr * (1,)).parse_string(text).pprint() 4820 """ 4821 4822 def _generateDefaultName(self): 4823 return "{" + str(self.expr) + "}..." 4824 4825 4826class ZeroOrMore(_MultipleMatch): 4827 """ 4828 Optional repetition of zero or more of the given expression. 4829 4830 Parameters: 4831 - ``expr`` - expression that must match zero or more times 4832 - ``stop_on`` - expression for a terminating sentinel 4833 (only required if the sentinel would ordinarily match the repetition 4834 expression) - (default= ``None``) 4835 4836 Example: similar to :class:`OneOrMore` 4837 """ 4838 4839 def __init__( 4840 self, 4841 expr: ParserElement, 4842 stop_on: OptionalType[Union[ParserElement, str]] = None, 4843 *, 4844 stopOn: OptionalType[Union[ParserElement, str]] = None, 4845 ): 4846 super().__init__(expr, stopOn=stopOn or stop_on) 4847 self.mayReturnEmpty = True 4848 4849 def parseImpl(self, instring, loc, doActions=True): 4850 try: 4851 return super().parseImpl(instring, loc, doActions) 4852 except (ParseException, IndexError): 4853 return loc, ParseResults([], name=self.resultsName) 4854 4855 def _generateDefaultName(self): 4856 return "[" + str(self.expr) + "]..." 4857 4858 4859class _NullToken: 4860 def __bool__(self): 4861 return False 4862 4863 def __str__(self): 4864 return "" 4865 4866 4867class Opt(ParseElementEnhance): 4868 """ 4869 Optional matching of the given expression. 4870 4871 Parameters: 4872 - ``expr`` - expression that must match zero or more times 4873 - ``default`` (optional) - value to be returned if the optional expression is not found. 4874 4875 Example:: 4876 4877 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 4878 zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4))) 4879 zip.run_tests(''' 4880 # traditional ZIP code 4881 12345 4882 4883 # ZIP+4 form 4884 12101-0001 4885 4886 # invalid ZIP 4887 98765- 4888 ''') 4889 4890 prints:: 4891 4892 # traditional ZIP code 4893 12345 4894 ['12345'] 4895 4896 # ZIP+4 form 4897 12101-0001 4898 ['12101-0001'] 4899 4900 # invalid ZIP 4901 98765- 4902 ^ 4903 FAIL: Expected end of text (at char 5), (line:1, col:6) 4904 """ 4905 4906 __optionalNotMatched = _NullToken() 4907 4908 def __init__( 4909 self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched 4910 ): 4911 super().__init__(expr, savelist=False) 4912 self.saveAsList = self.expr.saveAsList 4913 self.defaultValue = default 4914 self.mayReturnEmpty = True 4915 4916 def parseImpl(self, instring, loc, doActions=True): 4917 self_expr = self.expr 4918 try: 4919 loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False) 4920 except (ParseException, IndexError): 4921 default_value = self.defaultValue 4922 if default_value is not self.__optionalNotMatched: 4923 if self_expr.resultsName: 4924 tokens = ParseResults([default_value]) 4925 tokens[self_expr.resultsName] = default_value 4926 else: 4927 tokens = [default_value] 4928 else: 4929 tokens = [] 4930 return loc, tokens 4931 4932 def _generateDefaultName(self): 4933 inner = str(self.expr) 4934 # strip off redundant inner {}'s 4935 while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}": 4936 inner = inner[1:-1] 4937 return "[" + inner + "]" 4938 4939 4940Optional = Opt 4941 4942 4943class SkipTo(ParseElementEnhance): 4944 """ 4945 Token for skipping over all undefined text until the matched 4946 expression is found. 4947 4948 Parameters: 4949 - ``expr`` - target expression marking the end of the data to be skipped 4950 - ``include`` - if ``True``, the target expression is also parsed 4951 (the skipped text and target expression are returned as a 2-element 4952 list) (default= ``False``). 4953 - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and 4954 comments) that might contain false matches to the target expression 4955 - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be 4956 included in the skipped test; if found before the target expression is found, 4957 the :class:`SkipTo` is not a match 4958 4959 Example:: 4960 4961 report = ''' 4962 Outstanding Issues Report - 1 Jan 2000 4963 4964 # | Severity | Description | Days Open 4965 -----+----------+-------------------------------------------+----------- 4966 101 | Critical | Intermittent system crash | 6 4967 94 | Cosmetic | Spelling error on Login ('log|n') | 14 4968 79 | Minor | System slow when running too many reports | 47 4969 ''' 4970 integer = Word(nums) 4971 SEP = Suppress('|') 4972 # use SkipTo to simply match everything up until the next SEP 4973 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 4974 # - parse action will call token.strip() for each matched token, i.e., the description body 4975 string_data = SkipTo(SEP, ignore=quoted_string) 4976 string_data.set_parse_action(token_map(str.strip)) 4977 ticket_expr = (integer("issue_num") + SEP 4978 + string_data("sev") + SEP 4979 + string_data("desc") + SEP 4980 + integer("days_open")) 4981 4982 for tkt in ticket_expr.search_string(report): 4983 print tkt.dump() 4984 4985 prints:: 4986 4987 ['101', 'Critical', 'Intermittent system crash', '6'] 4988 - days_open: 6 4989 - desc: Intermittent system crash 4990 - issue_num: 101 4991 - sev: Critical 4992 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 4993 - days_open: 14 4994 - desc: Spelling error on Login ('log|n') 4995 - issue_num: 94 4996 - sev: Cosmetic 4997 ['79', 'Minor', 'System slow when running too many reports', '47'] 4998 - days_open: 47 4999 - desc: System slow when running too many reports 5000 - issue_num: 79 5001 - sev: Minor 5002 """ 5003 5004 def __init__( 5005 self, 5006 other: Union[ParserElement, str], 5007 include: bool = False, 5008 ignore: bool = None, 5009 fail_on: OptionalType[Union[ParserElement, str]] = None, 5010 *, 5011 failOn: Union[ParserElement, str] = None, 5012 ): 5013 super().__init__(other) 5014 failOn = failOn or fail_on 5015 self.ignoreExpr = ignore 5016 self.mayReturnEmpty = True 5017 self.mayIndexError = False 5018 self.includeMatch = include 5019 self.saveAsList = False 5020 if isinstance(failOn, str_type): 5021 self.failOn = self._literalStringClass(failOn) 5022 else: 5023 self.failOn = failOn 5024 self.errmsg = "No match found for " + str(self.expr) 5025 5026 def parseImpl(self, instring, loc, doActions=True): 5027 startloc = loc 5028 instrlen = len(instring) 5029 self_expr_parse = self.expr._parse 5030 self_failOn_canParseNext = ( 5031 self.failOn.canParseNext if self.failOn is not None else None 5032 ) 5033 self_ignoreExpr_tryParse = ( 5034 self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 5035 ) 5036 5037 tmploc = loc 5038 while tmploc <= instrlen: 5039 if self_failOn_canParseNext is not None: 5040 # break if failOn expression matches 5041 if self_failOn_canParseNext(instring, tmploc): 5042 break 5043 5044 if self_ignoreExpr_tryParse is not None: 5045 # advance past ignore expressions 5046 while 1: 5047 try: 5048 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 5049 except ParseBaseException: 5050 break 5051 5052 try: 5053 self_expr_parse(instring, tmploc, doActions=False, callPreParse=False) 5054 except (ParseException, IndexError): 5055 # no match, advance loc in string 5056 tmploc += 1 5057 else: 5058 # matched skipto expr, done 5059 break 5060 5061 else: 5062 # ran off the end of the input string without matching skipto expr, fail 5063 raise ParseException(instring, loc, self.errmsg, self) 5064 5065 # build up return values 5066 loc = tmploc 5067 skiptext = instring[startloc:loc] 5068 skipresult = ParseResults(skiptext) 5069 5070 if self.includeMatch: 5071 loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False) 5072 skipresult += mat 5073 5074 return loc, skipresult 5075 5076 5077class Forward(ParseElementEnhance): 5078 """ 5079 Forward declaration of an expression to be defined later - 5080 used for recursive grammars, such as algebraic infix notation. 5081 When the expression is known, it is assigned to the ``Forward`` 5082 variable using the ``'<<'`` operator. 5083 5084 Note: take care when assigning to ``Forward`` not to overlook 5085 precedence of operators. 5086 5087 Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that:: 5088 5089 fwd_expr << a | b | c 5090 5091 will actually be evaluated as:: 5092 5093 (fwd_expr << a) | b | c 5094 5095 thereby leaving b and c out as parseable alternatives. It is recommended that you 5096 explicitly group the values inserted into the ``Forward``:: 5097 5098 fwd_expr << (a | b | c) 5099 5100 Converting to use the ``'<<='`` operator instead will avoid this problem. 5101 5102 See :class:`ParseResults.pprint` for an example of a recursive 5103 parser created using ``Forward``. 5104 """ 5105 5106 def __init__(self, other: OptionalType[Union[ParserElement, str]] = None): 5107 self.caller_frame = traceback.extract_stack(limit=2)[0] 5108 super().__init__(other, savelist=False) 5109 self.lshift_line = None 5110 5111 def __lshift__(self, other): 5112 if hasattr(self, "caller_frame"): 5113 del self.caller_frame 5114 if isinstance(other, str_type): 5115 other = self._literalStringClass(other) 5116 self.expr = other 5117 self.mayIndexError = self.expr.mayIndexError 5118 self.mayReturnEmpty = self.expr.mayReturnEmpty 5119 self.set_whitespace_chars( 5120 self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars 5121 ) 5122 self.skipWhitespace = self.expr.skipWhitespace 5123 self.saveAsList = self.expr.saveAsList 5124 self.ignoreExprs.extend(self.expr.ignoreExprs) 5125 self.lshift_line = traceback.extract_stack(limit=2)[-2] 5126 return self 5127 5128 def __ilshift__(self, other): 5129 return self << other 5130 5131 def __or__(self, other): 5132 caller_line = traceback.extract_stack(limit=2)[-2] 5133 if ( 5134 __diag__.warn_on_match_first_with_lshift_operator 5135 and caller_line == self.lshift_line 5136 and Diagnostics.warn_on_match_first_with_lshift_operator 5137 not in self.suppress_warnings_ 5138 ): 5139 warnings.warn( 5140 "using '<<' operator with '|' is probably an error, use '<<='", 5141 stacklevel=2, 5142 ) 5143 ret = super().__or__(other) 5144 return ret 5145 5146 def __del__(self): 5147 # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<' 5148 if ( 5149 self.expr is None 5150 and __diag__.warn_on_assignment_to_Forward 5151 and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_ 5152 ): 5153 warnings.warn_explicit( 5154 "Forward defined here but no expression attached later using '<<=' or '<<'", 5155 UserWarning, 5156 filename=self.caller_frame.filename, 5157 lineno=self.caller_frame.lineno, 5158 ) 5159 5160 def parseImpl(self, instring, loc, doActions=True): 5161 if ( 5162 self.expr is None 5163 and __diag__.warn_on_parse_using_empty_Forward 5164 and Diagnostics.warn_on_parse_using_empty_Forward 5165 not in self.suppress_warnings_ 5166 ): 5167 # walk stack until parse_string, scan_string, search_string, or transform_string is found 5168 parse_fns = [ 5169 "parse_string", 5170 "scan_string", 5171 "search_string", 5172 "transform_string", 5173 ] 5174 tb = traceback.extract_stack(limit=200) 5175 for i, frm in enumerate(reversed(tb), start=1): 5176 if frm.name in parse_fns: 5177 stacklevel = i + 1 5178 break 5179 else: 5180 stacklevel = 2 5181 warnings.warn( 5182 "Forward expression was never assigned a value, will not parse any input", 5183 stacklevel=stacklevel, 5184 ) 5185 if not ParserElement._left_recursion_enabled: 5186 return super().parseImpl(instring, loc, doActions) 5187 # ## Bounded Recursion algorithm ## 5188 # Recursion only needs to be processed at ``Forward`` elements, since they are 5189 # the only ones that can actually refer to themselves. The general idea is 5190 # to handle recursion stepwise: We start at no recursion, then recurse once, 5191 # recurse twice, ..., until more recursion offers no benefit (we hit the bound). 5192 # 5193 # The "trick" here is that each ``Forward`` gets evaluated in two contexts 5194 # - to *match* a specific recursion level, and 5195 # - to *search* the bounded recursion level 5196 # and the two run concurrently. The *search* must *match* each recursion level 5197 # to find the best possible match. This is handled by a memo table, which 5198 # provides the previous match to the next level match attempt. 5199 # 5200 # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al. 5201 # 5202 # There is a complication since we not only *parse* but also *transform* via 5203 # actions: We do not want to run the actions too often while expanding. Thus, 5204 # we expand using `doActions=False` and only run `doActions=True` if the next 5205 # recursion level is acceptable. 5206 with ParserElement.recursion_lock: 5207 memo = ParserElement.recursion_memos 5208 try: 5209 # we are parsing at a specific recursion expansion - use it as-is 5210 prev_loc, prev_result = memo[loc, self, doActions] 5211 if isinstance(prev_result, Exception): 5212 raise prev_result 5213 return prev_loc, prev_result.copy() 5214 except KeyError: 5215 act_key = (loc, self, True) 5216 peek_key = (loc, self, False) 5217 # we are searching for the best recursion expansion - keep on improving 5218 # both `doActions` cases must be tracked separately here! 5219 prev_loc, prev_peek = memo[peek_key] = ( 5220 loc - 1, 5221 ParseException( 5222 instring, loc, "Forward recursion without base case", self 5223 ), 5224 ) 5225 if doActions: 5226 memo[act_key] = memo[peek_key] 5227 while True: 5228 try: 5229 new_loc, new_peek = super().parseImpl(instring, loc, False) 5230 except ParseException: 5231 # we failed before getting any match – do not hide the error 5232 if isinstance(prev_peek, Exception): 5233 raise 5234 new_loc, new_peek = prev_loc, prev_peek 5235 # the match did not get better: we are done 5236 if new_loc <= prev_loc: 5237 if doActions: 5238 # replace the match for doActions=False as well, 5239 # in case the action did backtrack 5240 prev_loc, prev_result = memo[peek_key] = memo[act_key] 5241 del memo[peek_key], memo[act_key] 5242 return prev_loc, prev_result.copy() 5243 del memo[peek_key] 5244 return prev_loc, prev_peek.copy() 5245 # the match did get better: see if we can improve further 5246 else: 5247 if doActions: 5248 try: 5249 memo[act_key] = super().parseImpl(instring, loc, True) 5250 except ParseException as e: 5251 memo[peek_key] = memo[act_key] = (new_loc, e) 5252 raise 5253 prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek 5254 5255 def leave_whitespace(self, recursive=True): 5256 self.skipWhitespace = False 5257 return self 5258 5259 def ignore_whitespace(self, recursive=True): 5260 self.skipWhitespace = True 5261 return self 5262 5263 def streamline(self): 5264 if not self.streamlined: 5265 self.streamlined = True 5266 if self.expr is not None: 5267 self.expr.streamline() 5268 return self 5269 5270 def validate(self, validateTrace=None): 5271 if validateTrace is None: 5272 validateTrace = [] 5273 5274 if self not in validateTrace: 5275 tmp = validateTrace[:] + [self] 5276 if self.expr is not None: 5277 self.expr.validate(tmp) 5278 self._checkRecursion([]) 5279 5280 def _generateDefaultName(self): 5281 # Avoid infinite recursion by setting a temporary _defaultName 5282 self._defaultName = ": ..." 5283 5284 # Use the string representation of main expression. 5285 retString = "..." 5286 try: 5287 if self.expr is not None: 5288 retString = str(self.expr)[:1000] 5289 else: 5290 retString = "None" 5291 finally: 5292 return self.__class__.__name__ + ": " + retString 5293 5294 def copy(self): 5295 if self.expr is not None: 5296 return super().copy() 5297 else: 5298 ret = Forward() 5299 ret <<= self 5300 return ret 5301 5302 def _setResultsName(self, name, list_all_matches=False): 5303 if ( 5304 __diag__.warn_name_set_on_empty_Forward 5305 and Diagnostics.warn_name_set_on_empty_Forward 5306 not in self.suppress_warnings_ 5307 ): 5308 if self.expr is None: 5309 warnings.warn( 5310 "{}: setting results name {!r} on {} expression " 5311 "that has no contained expression".format( 5312 "warn_name_set_on_empty_Forward", name, type(self).__name__ 5313 ), 5314 stacklevel=3, 5315 ) 5316 5317 return super()._setResultsName(name, list_all_matches) 5318 5319 ignoreWhitespace = ignore_whitespace 5320 leaveWhitespace = leave_whitespace 5321 5322 5323class TokenConverter(ParseElementEnhance): 5324 """ 5325 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 5326 """ 5327 5328 def __init__(self, expr: Union[ParserElement, str], savelist=False): 5329 super().__init__(expr) # , savelist) 5330 self.saveAsList = False 5331 5332 5333class Combine(TokenConverter): 5334 """Converter to concatenate all matching tokens to a single string. 5335 By default, the matching patterns must also be contiguous in the 5336 input string; this can be disabled by specifying 5337 ``'adjacent=False'`` in the constructor. 5338 5339 Example:: 5340 5341 real = Word(nums) + '.' + Word(nums) 5342 print(real.parse_string('3.1416')) # -> ['3', '.', '1416'] 5343 # will also erroneously match the following 5344 print(real.parse_string('3. 1416')) # -> ['3', '.', '1416'] 5345 5346 real = Combine(Word(nums) + '.' + Word(nums)) 5347 print(real.parse_string('3.1416')) # -> ['3.1416'] 5348 # no match when there are internal spaces 5349 print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...) 5350 """ 5351 5352 def __init__( 5353 self, 5354 expr: ParserElement, 5355 join_string: str = "", 5356 adjacent: bool = True, 5357 *, 5358 joinString: OptionalType[str] = None, 5359 ): 5360 super().__init__(expr) 5361 joinString = joinString if joinString is not None else join_string 5362 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 5363 if adjacent: 5364 self.leave_whitespace() 5365 self.adjacent = adjacent 5366 self.skipWhitespace = True 5367 self.joinString = joinString 5368 self.callPreparse = True 5369 5370 def ignore(self, other): 5371 if self.adjacent: 5372 ParserElement.ignore(self, other) 5373 else: 5374 super().ignore(other) 5375 return self 5376 5377 def postParse(self, instring, loc, tokenlist): 5378 retToks = tokenlist.copy() 5379 del retToks[:] 5380 retToks += ParseResults( 5381 ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults 5382 ) 5383 5384 if self.resultsName and retToks.haskeys(): 5385 return [retToks] 5386 else: 5387 return retToks 5388 5389 5390class Group(TokenConverter): 5391 """Converter to return the matched tokens as a list - useful for 5392 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 5393 5394 The optional ``aslist`` argument when set to True will return the 5395 parsed tokens as a Python list instead of a pyparsing ParseResults. 5396 5397 Example:: 5398 5399 ident = Word(alphas) 5400 num = Word(nums) 5401 term = ident | num 5402 func = ident + Opt(delimited_list(term)) 5403 print(func.parse_string("fn a, b, 100")) 5404 # -> ['fn', 'a', 'b', '100'] 5405 5406 func = ident + Group(Opt(delimited_list(term))) 5407 print(func.parse_string("fn a, b, 100")) 5408 # -> ['fn', ['a', 'b', '100']] 5409 """ 5410 5411 def __init__(self, expr: ParserElement, aslist: bool = False): 5412 super().__init__(expr) 5413 self.saveAsList = True 5414 self._asPythonList = aslist 5415 5416 def postParse(self, instring, loc, tokenlist): 5417 if self._asPythonList: 5418 return ParseResults.List( 5419 tokenlist.asList() 5420 if isinstance(tokenlist, ParseResults) 5421 else list(tokenlist) 5422 ) 5423 else: 5424 return [tokenlist] 5425 5426 5427class Dict(TokenConverter): 5428 """Converter to return a repetitive expression as a list, but also 5429 as a dictionary. Each element can also be referenced using the first 5430 token in the expression as its key. Useful for tabular report 5431 scraping when the first column can be used as a item key. 5432 5433 The optional ``asdict`` argument when set to True will return the 5434 parsed tokens as a Python dict instead of a pyparsing ParseResults. 5435 5436 Example:: 5437 5438 data_word = Word(alphas) 5439 label = data_word + FollowedBy(':') 5440 5441 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 5442 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)) 5443 5444 # print attributes as plain groups 5445 print(OneOrMore(attr_expr).parse_string(text).dump()) 5446 5447 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 5448 result = Dict(OneOrMore(Group(attr_expr))).parse_string(text) 5449 print(result.dump()) 5450 5451 # access named fields as dict entries, or output as dict 5452 print(result['shape']) 5453 print(result.as_dict()) 5454 5455 prints:: 5456 5457 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 5458 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 5459 - color: light blue 5460 - posn: upper left 5461 - shape: SQUARE 5462 - texture: burlap 5463 SQUARE 5464 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 5465 5466 See more examples at :class:`ParseResults` of accessing fields by results name. 5467 """ 5468 5469 def __init__(self, expr: ParserElement, asdict: bool = False): 5470 super().__init__(expr) 5471 self.saveAsList = True 5472 self._asPythonDict = asdict 5473 5474 def postParse(self, instring, loc, tokenlist): 5475 for i, tok in enumerate(tokenlist): 5476 if len(tok) == 0: 5477 continue 5478 5479 ikey = tok[0] 5480 if isinstance(ikey, int): 5481 ikey = str(ikey).strip() 5482 5483 if len(tok) == 1: 5484 tokenlist[ikey] = _ParseResultsWithOffset("", i) 5485 5486 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 5487 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 5488 5489 else: 5490 try: 5491 dictvalue = tok.copy() # ParseResults(i) 5492 except Exception: 5493 exc = TypeError( 5494 "could not extract dict values from parsed results" 5495 " - Dict expression must contain Grouped expressions" 5496 ) 5497 raise exc from None 5498 5499 del dictvalue[0] 5500 5501 if len(dictvalue) != 1 or ( 5502 isinstance(dictvalue, ParseResults) and dictvalue.haskeys() 5503 ): 5504 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 5505 else: 5506 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 5507 5508 if self._asPythonDict: 5509 return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict() 5510 else: 5511 return [tokenlist] if self.resultsName else tokenlist 5512 5513 5514class Suppress(TokenConverter): 5515 """Converter for ignoring the results of a parsed expression. 5516 5517 Example:: 5518 5519 source = "a, b, c,d" 5520 wd = Word(alphas) 5521 wd_list1 = wd + ZeroOrMore(',' + wd) 5522 print(wd_list1.parse_string(source)) 5523 5524 # often, delimiters that are useful during parsing are just in the 5525 # way afterward - use Suppress to keep them out of the parsed output 5526 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 5527 print(wd_list2.parse_string(source)) 5528 5529 # Skipped text (using '...') can be suppressed as well 5530 source = "lead in START relevant text END trailing text" 5531 start_marker = Keyword("START") 5532 end_marker = Keyword("END") 5533 find_body = Suppress(...) + start_marker + ... + end_marker 5534 print(find_body.parse_string(source) 5535 5536 prints:: 5537 5538 ['a', ',', 'b', ',', 'c', ',', 'd'] 5539 ['a', 'b', 'c', 'd'] 5540 ['START', 'relevant text ', 'END'] 5541 5542 (See also :class:`delimited_list`.) 5543 """ 5544 5545 def __init__(self, expr: Union[ParserElement, str], savelist: bool = False): 5546 if expr is ...: 5547 expr = _PendingSkip(NoMatch()) 5548 super().__init__(expr) 5549 5550 def __add__(self, other): 5551 if isinstance(self.expr, _PendingSkip): 5552 return Suppress(SkipTo(other)) + other 5553 else: 5554 return super().__add__(other) 5555 5556 def __sub__(self, other): 5557 if isinstance(self.expr, _PendingSkip): 5558 return Suppress(SkipTo(other)) - other 5559 else: 5560 return super().__sub__(other) 5561 5562 def postParse(self, instring, loc, tokenlist): 5563 return [] 5564 5565 def suppress(self): 5566 return self 5567 5568 5569def trace_parse_action(f: ParseAction): 5570 """Decorator for debugging parse actions. 5571 5572 When the parse action is called, this decorator will print 5573 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 5574 When the parse action completes, the decorator will print 5575 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 5576 5577 Example:: 5578 5579 wd = Word(alphas) 5580 5581 @trace_parse_action 5582 def remove_duplicate_chars(tokens): 5583 return ''.join(sorted(set(''.join(tokens)))) 5584 5585 wds = OneOrMore(wd).set_parse_action(remove_duplicate_chars) 5586 print(wds.parse_string("slkdjs sld sldd sdlf sdljf")) 5587 5588 prints:: 5589 5590 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 5591 <<leaving remove_duplicate_chars (ret: 'dfjkls') 5592 ['dfjkls'] 5593 """ 5594 f = _trim_arity(f) 5595 5596 def z(*paArgs): 5597 thisFunc = f.__name__ 5598 s, l, t = paArgs[-3:] 5599 if len(paArgs) > 3: 5600 thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc 5601 sys.stderr.write( 5602 ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t) 5603 ) 5604 try: 5605 ret = f(*paArgs) 5606 except Exception as exc: 5607 sys.stderr.write("<<leaving {} (exception: {})\n".format(thisFunc, exc)) 5608 raise 5609 sys.stderr.write("<<leaving {} (ret: {!r})\n".format(thisFunc, ret)) 5610 return ret 5611 5612 z.__name__ = f.__name__ 5613 return z 5614 5615 5616# convenience constants for positional expressions 5617empty = Empty().set_name("empty") 5618line_start = LineStart().set_name("line_start") 5619line_end = LineEnd().set_name("line_end") 5620string_start = StringStart().set_name("string_start") 5621string_end = StringEnd().set_name("string_end") 5622 5623_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).set_parse_action( 5624 lambda s, l, t: t[0][1] 5625) 5626_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action( 5627 lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16)) 5628) 5629_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action( 5630 lambda s, l, t: chr(int(t[0][1:], 8)) 5631) 5632_singleChar = ( 5633 _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1) 5634) 5635_charRange = Group(_singleChar + Suppress("-") + _singleChar) 5636_reBracketExpr = ( 5637 Literal("[") 5638 + Opt("^").set_results_name("negate") 5639 + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body") 5640 + "]" 5641) 5642 5643 5644def srange(s): 5645 r"""Helper to easily define string ranges for use in :class:`Word` 5646 construction. Borrows syntax from regexp ``'[]'`` string range 5647 definitions:: 5648 5649 srange("[0-9]") -> "0123456789" 5650 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 5651 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 5652 5653 The input string must be enclosed in []'s, and the returned string 5654 is the expanded character set joined into a single string. The 5655 values enclosed in the []'s may be: 5656 5657 - a single character 5658 - an escaped character with a leading backslash (such as ``\-`` 5659 or ``\]``) 5660 - an escaped hex character with a leading ``'\x'`` 5661 (``\x21``, which is a ``'!'`` character) (``\0x##`` 5662 is also supported for backwards compatibility) 5663 - an escaped octal character with a leading ``'\0'`` 5664 (``\041``, which is a ``'!'`` character) 5665 - a range of any of the above, separated by a dash (``'a-z'``, 5666 etc.) 5667 - any combination of the above (``'aeiouy'``, 5668 ``'a-zA-Z0-9_$'``, etc.) 5669 """ 5670 _expanded = ( 5671 lambda p: p 5672 if not isinstance(p, ParseResults) 5673 else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 5674 ) 5675 try: 5676 return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body) 5677 except Exception: 5678 return "" 5679 5680 5681def token_map(func, *args): 5682 """Helper to define a parse action by mapping a function to all 5683 elements of a :class:`ParseResults` list. If any additional args are passed, 5684 they are forwarded to the given function as additional arguments 5685 after the token, as in 5686 ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``, 5687 which will convert the parsed data to an integer using base 16. 5688 5689 Example (compare the last to example in :class:`ParserElement.transform_string`:: 5690 5691 hex_ints = OneOrMore(Word(hexnums)).set_parse_action(token_map(int, 16)) 5692 hex_ints.run_tests(''' 5693 00 11 22 aa FF 0a 0d 1a 5694 ''') 5695 5696 upperword = Word(alphas).set_parse_action(token_map(str.upper)) 5697 OneOrMore(upperword).run_tests(''' 5698 my kingdom for a horse 5699 ''') 5700 5701 wd = Word(alphas).set_parse_action(token_map(str.title)) 5702 OneOrMore(wd).set_parse_action(' '.join).run_tests(''' 5703 now is the winter of our discontent made glorious summer by this sun of york 5704 ''') 5705 5706 prints:: 5707 5708 00 11 22 aa FF 0a 0d 1a 5709 [0, 17, 34, 170, 255, 10, 13, 26] 5710 5711 my kingdom for a horse 5712 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 5713 5714 now is the winter of our discontent made glorious summer by this sun of york 5715 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 5716 """ 5717 5718 def pa(s, l, t): 5719 return [func(tokn, *args) for tokn in t] 5720 5721 func_name = getattr(func, "__name__", getattr(func, "__class__").__name__) 5722 pa.__name__ = func_name 5723 5724 return pa 5725 5726 5727def autoname_elements(): 5728 """ 5729 Utility to simplify mass-naming of parser elements, for 5730 generating railroad diagram with named subdiagrams. 5731 """ 5732 for name, var in sys._getframe().f_back.f_locals.items(): 5733 if isinstance(var, ParserElement) and not var.customName: 5734 var.set_name(name) 5735 5736 5737dbl_quoted_string = Combine( 5738 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 5739).set_name("string enclosed in double quotes") 5740 5741sgl_quoted_string = Combine( 5742 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 5743).set_name("string enclosed in single quotes") 5744 5745quoted_string = Combine( 5746 Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 5747 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'" 5748).set_name("quotedString using single or double quotes") 5749 5750unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal") 5751 5752 5753alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5754punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5755 5756# build list of built-in expressions, for future reference if a global default value 5757# gets updated 5758_builtin_exprs = [v for v in vars().values() if isinstance(v, ParserElement)] 5759 5760# backward compatibility names 5761tokenMap = token_map 5762conditionAsParseAction = condition_as_parse_action 5763nullDebugAction = null_debug_action 5764sglQuotedString = sgl_quoted_string 5765dblQuotedString = dbl_quoted_string 5766quotedString = quoted_string 5767unicodeString = unicode_string 5768lineStart = line_start 5769lineEnd = line_end 5770stringStart = string_start 5771stringEnd = string_end 5772traceParseAction = trace_parse_action 5773