1#
2# core.py
3#
4import os
5from typing import (
6    Optional as OptionalType,
7    Iterable as IterableType,
8    Union,
9    Callable,
10    Any,
11    Generator,
12    Tuple,
13    List,
14    TextIO,
15    Set,
16    Dict as DictType,
17)
18from abc import ABC, abstractmethod
19from enum import Enum
20import string
21import copy
22import warnings
23import re
24import sre_constants
25import sys
26from collections.abc import Iterable
27import traceback
28import types
29from operator import itemgetter
30from functools import wraps
31from threading import RLock
32from pathlib import Path
33
34from .util import (
35    _FifoCache,
36    _UnboundedCache,
37    __config_flags,
38    _collapse_string_to_ranges,
39    _escape_regex_range_chars,
40    _bslash,
41    _flatten,
42    LRUMemo as _LRUMemo,
43    UnboundedMemo as _UnboundedMemo,
44)
45from .exceptions import *
46from .actions import *
47from .results import ParseResults, _ParseResultsWithOffset
48from .unicode import pyparsing_unicode
49
50_MAX_INT = sys.maxsize
51str_type: Tuple[type, ...] = (str, bytes)
52
53#
54# Copyright (c) 2003-2021  Paul T. McGuire
55#
56# Permission is hereby granted, free of charge, to any person obtaining
57# a copy of this software and associated documentation files (the
58# "Software"), to deal in the Software without restriction, including
59# without limitation the rights to use, copy, modify, merge, publish,
60# distribute, sublicense, and/or sell copies of the Software, and to
61# permit persons to whom the Software is furnished to do so, subject to
62# the following conditions:
63#
64# The above copyright notice and this permission notice shall be
65# included in all copies or substantial portions of the Software.
66#
67# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
68# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
69# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
70# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
71# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
72# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
73# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
74#
75
76
77class __compat__(__config_flags):
78    """
79    A cross-version compatibility configuration for pyparsing features that will be
80    released in a future version. By setting values in this configuration to True,
81    those features can be enabled in prior versions for compatibility development
82    and testing.
83
84    - ``collect_all_And_tokens`` - flag to enable fix for Issue #63 that fixes erroneous grouping
85      of results names when an :class:`And` expression is nested within an :class:`Or` or :class:`MatchFirst`;
86      maintained for compatibility, but setting to ``False`` no longer restores pre-2.3.1
87      behavior
88    """
89
90    _type_desc = "compatibility"
91
92    collect_all_And_tokens = True
93
94    _all_names = [__ for __ in locals() if not __.startswith("_")]
95    _fixed_names = """
96        collect_all_And_tokens
97        """.split()
98
99
100class __diag__(__config_flags):
101    _type_desc = "diagnostic"
102
103    warn_multiple_tokens_in_named_alternation = False
104    warn_ungrouped_named_tokens_in_collection = False
105    warn_name_set_on_empty_Forward = False
106    warn_on_parse_using_empty_Forward = False
107    warn_on_assignment_to_Forward = False
108    warn_on_multiple_string_args_to_oneof = False
109    warn_on_match_first_with_lshift_operator = False
110    enable_debug_on_named_expressions = False
111
112    _all_names = [__ for __ in locals() if not __.startswith("_")]
113    _warning_names = [name for name in _all_names if name.startswith("warn")]
114    _debug_names = [name for name in _all_names if name.startswith("enable_debug")]
115
116    @classmethod
117    def enable_all_warnings(cls):
118        for name in cls._warning_names:
119            cls.enable(name)
120
121
122class Diagnostics(Enum):
123    """
124    Diagnostic configuration (all default to disabled)
125    - ``warn_multiple_tokens_in_named_alternation`` - flag to enable warnings when a results
126      name is defined on a :class:`MatchFirst` or :class:`Or` expression with one or more :class:`And` subexpressions
127    - ``warn_ungrouped_named_tokens_in_collection`` - flag to enable warnings when a results
128      name is defined on a containing expression with ungrouped subexpressions that also
129      have results names
130    - ``warn_name_set_on_empty_Forward`` - flag to enable warnings when a :class:`Forward` is defined
131      with a results name, but has no contents defined
132    - ``warn_on_parse_using_empty_Forward`` - flag to enable warnings when a :class:`Forward` is
133      defined in a grammar but has never had an expression attached to it
134    - ``warn_on_assignment_to_Forward`` - flag to enable warnings when a :class:`Forward` is defined
135      but is overwritten by assigning using ``'='`` instead of ``'<<='`` or ``'<<'``
136    - ``warn_on_multiple_string_args_to_oneof`` - flag to enable warnings when :class:`one_of` is
137      incorrectly called with multiple str arguments
138    - ``enable_debug_on_named_expressions`` - flag to auto-enable debug on all subsequent
139      calls to :class:`ParserElement.set_name`
140
141    Diagnostics are enabled/disabled by calling :class:`enable_diag` and :class:`disable_diag`.
142    All warnings can be enabled by calling :class:`enable_all_warnings`.
143    """
144
145    warn_multiple_tokens_in_named_alternation = 0
146    warn_ungrouped_named_tokens_in_collection = 1
147    warn_name_set_on_empty_Forward = 2
148    warn_on_parse_using_empty_Forward = 3
149    warn_on_assignment_to_Forward = 4
150    warn_on_multiple_string_args_to_oneof = 5
151    warn_on_match_first_with_lshift_operator = 6
152    enable_debug_on_named_expressions = 7
153
154
155def enable_diag(diag_enum):
156    """
157    Enable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
158    """
159    __diag__.enable(diag_enum.name)
160
161
162def disable_diag(diag_enum):
163    """
164    Disable a global pyparsing diagnostic flag (see :class:`Diagnostics`).
165    """
166    __diag__.disable(diag_enum.name)
167
168
169def enable_all_warnings():
170    """
171    Enable all global pyparsing diagnostic warnings (see :class:`Diagnostics`).
172    """
173    __diag__.enable_all_warnings()
174
175
176# hide abstract class
177del __config_flags
178
179
180def _should_enable_warnings(
181    cmd_line_warn_options: List[str], warn_env_var: OptionalType[str]
182) -> bool:
183    enable = bool(warn_env_var)
184    for warn_opt in cmd_line_warn_options:
185        w_action, w_message, w_category, w_module, w_line = (warn_opt + "::::").split(
186            ":"
187        )[:5]
188        if not w_action.lower().startswith("i") and (
189            not (w_message or w_category or w_module) or w_module == "pyparsing"
190        ):
191            enable = True
192        elif w_action.lower().startswith("i") and w_module in ("pyparsing", ""):
193            enable = False
194    return enable
195
196
197if _should_enable_warnings(
198    sys.warnoptions, os.environ.get("PYPARSINGENABLEALLWARNINGS")
199):
200    enable_all_warnings()
201
202
203# build list of single arg builtins, that can be used as parse actions
204_single_arg_builtins = {
205    sum,
206    len,
207    sorted,
208    reversed,
209    list,
210    tuple,
211    set,
212    any,
213    all,
214    min,
215    max,
216}
217
218_generatorType = types.GeneratorType
219ParseAction = Union[
220    Callable[[], Any],
221    Callable[[ParseResults], Any],
222    Callable[[int, ParseResults], Any],
223    Callable[[str, int, ParseResults], Any],
224]
225ParseCondition = Union[
226    Callable[[], bool],
227    Callable[[ParseResults], bool],
228    Callable[[int, ParseResults], bool],
229    Callable[[str, int, ParseResults], bool],
230]
231ParseFailAction = Callable[[str, int, "ParserElement", Exception], None]
232DebugStartAction = Callable[[str, int, "ParserElement", bool], None]
233DebugSuccessAction = Callable[
234    [str, int, int, "ParserElement", ParseResults, bool], None
235]
236DebugExceptionAction = Callable[[str, int, "ParserElement", Exception, bool], None]
237
238
239alphas = string.ascii_uppercase + string.ascii_lowercase
240identchars = pyparsing_unicode.Latin1.identchars
241identbodychars = pyparsing_unicode.Latin1.identbodychars
242nums = "0123456789"
243hexnums = nums + "ABCDEFabcdef"
244alphanums = alphas + nums
245printables = "".join(c for c in string.printable if c not in string.whitespace)
246
247_trim_arity_call_line = None
248
249
250def _trim_arity(func, maxargs=2):
251    """decorator to trim function calls to match the arity of the target"""
252    global _trim_arity_call_line
253
254    if func in _single_arg_builtins:
255        return lambda s, l, t: func(t)
256
257    limit = 0
258    found_arity = False
259
260    def extract_tb(tb, limit=0):
261        frames = traceback.extract_tb(tb, limit=limit)
262        frame_summary = frames[-1]
263        return [frame_summary[:2]]
264
265    # synthesize what would be returned by traceback.extract_stack at the call to
266    # user's parse action 'func', so that we don't incur call penalty at parse time
267
268    LINE_DIFF = 11
269    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
270    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
271    _trim_arity_call_line = (
272        _trim_arity_call_line or traceback.extract_stack(limit=2)[-1]
273    )
274    pa_call_line_synth = (
275        _trim_arity_call_line[0],
276        _trim_arity_call_line[1] + LINE_DIFF,
277    )
278
279    def wrapper(*args):
280        nonlocal found_arity, limit
281        while 1:
282            try:
283                ret = func(*args[limit:])
284                found_arity = True
285                return ret
286            except TypeError as te:
287                # re-raise TypeErrors if they did not come from our arity testing
288                if found_arity:
289                    raise
290                else:
291                    tb = te.__traceback__
292                    trim_arity_type_error = (
293                        extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth
294                    )
295                    del tb
296
297                    if trim_arity_type_error:
298                        if limit <= maxargs:
299                            limit += 1
300                            continue
301
302                    raise
303
304    # copy func name to wrapper for sensible debug output
305    # (can't use functools.wraps, since that messes with function signature)
306    func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
307    wrapper.__name__ = func_name
308
309    return wrapper
310
311
312def condition_as_parse_action(
313    fn: ParseCondition, message: str = None, fatal: bool = False
314):
315    """
316    Function to convert a simple predicate function that returns ``True`` or ``False``
317    into a parse action. Can be used in places when a parse action is required
318    and :class:`ParserElement.add_condition` cannot be used (such as when adding a condition
319    to an operator level in :class:`infix_notation`).
320
321    Optional keyword arguments:
322
323    - ``message`` - define a custom message to be used in the raised exception
324    - ``fatal`` - if True, will raise :class:`ParseFatalException` to stop parsing immediately;
325      otherwise will raise :class:`ParseException`
326
327    """
328    msg = message if message is not None else "failed user-defined condition"
329    exc_type = ParseFatalException if fatal else ParseException
330    fn = _trim_arity(fn)
331
332    @wraps(fn)
333    def pa(s, l, t):
334        if not bool(fn(s, l, t)):
335            raise exc_type(s, l, msg)
336
337    return pa
338
339
340def _default_start_debug_action(
341    instring: str, loc: int, expr: "ParserElement", cache_hit: bool = False
342):
343    cache_hit_str = "*" if cache_hit else ""
344    print(
345        (
346            "{}Match {} at loc {}({},{})\n  {}\n  {}^".format(
347                cache_hit_str,
348                expr,
349                loc,
350                lineno(loc, instring),
351                col(loc, instring),
352                line(loc, instring),
353                " " * (col(loc, instring) - 1),
354            )
355        )
356    )
357
358
359def _default_success_debug_action(
360    instring: str,
361    startloc: int,
362    endloc: int,
363    expr: "ParserElement",
364    toks: ParseResults,
365    cache_hit: bool = False,
366):
367    cache_hit_str = "*" if cache_hit else ""
368    print("{}Matched {} -> {}".format(cache_hit_str, expr, toks.as_list()))
369
370
371def _default_exception_debug_action(
372    instring: str,
373    loc: int,
374    expr: "ParserElement",
375    exc: Exception,
376    cache_hit: bool = False,
377):
378    cache_hit_str = "*" if cache_hit else ""
379    print(
380        "{}Match {} failed, {} raised: {}".format(
381            cache_hit_str, expr, type(exc).__name__, exc
382        )
383    )
384
385
386def null_debug_action(*args):
387    """'Do-nothing' debug action, to suppress debugging output during parsing."""
388
389
390class ParserElement(ABC):
391    """Abstract base level parser element class."""
392
393    DEFAULT_WHITE_CHARS: str = " \n\t\r"
394    verbose_stacktrace: bool = False
395    _literalStringClass: OptionalType[type] = None
396
397    @staticmethod
398    def set_default_whitespace_chars(chars: str):
399        r"""
400        Overrides the default whitespace chars
401
402        Example::
403
404            # default whitespace chars are space, <TAB> and newline
405            OneOrMore(Word(alphas)).parse_string("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
406
407            # change to just treat newline as significant
408            ParserElement.set_default_whitespace_chars(" \t")
409            OneOrMore(Word(alphas)).parse_string("abc def\nghi jkl")  # -> ['abc', 'def']
410        """
411        ParserElement.DEFAULT_WHITE_CHARS = chars
412
413        # update whitespace all parse expressions defined in this module
414        for expr in _builtin_exprs:
415            if expr.copyDefaultWhiteChars:
416                expr.whiteChars = set(chars)
417
418    @staticmethod
419    def inline_literals_using(cls: type):
420        """
421        Set class to be used for inclusion of string literals into a parser.
422
423        Example::
424
425            # default literal class used is Literal
426            integer = Word(nums)
427            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
428
429            date_str.parse_string("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
430
431
432            # change to Suppress
433            ParserElement.inline_literals_using(Suppress)
434            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
435
436            date_str.parse_string("1999/12/31")  # -> ['1999', '12', '31']
437        """
438        ParserElement._literalStringClass = cls
439
440    def __init__(self, savelist: bool = False):
441        self.parseAction: List[ParseAction] = list()
442        self.failAction: OptionalType[ParseFailAction] = None
443        self.customName = None
444        self._defaultName = None
445        self.resultsName = None
446        self.saveAsList = savelist
447        self.skipWhitespace = True
448        self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
449        self.copyDefaultWhiteChars = True
450        # used when checking for left-recursion
451        self.mayReturnEmpty = False
452        self.keepTabs = False
453        self.ignoreExprs: List["ParserElement"] = list()
454        self.debug = False
455        self.streamlined = False
456        # optimize exception handling for subclasses that don't advance parse index
457        self.mayIndexError = True
458        self.errmsg = ""
459        # mark results names as modal (report only last) or cumulative (list all)
460        self.modalResults = True
461        # custom debug actions
462        self.debugActions: Tuple[
463            OptionalType[DebugStartAction],
464            OptionalType[DebugSuccessAction],
465            OptionalType[DebugExceptionAction],
466        ] = (None, None, None)
467        self.re = None
468        # avoid redundant calls to preParse
469        self.callPreparse = True
470        self.callDuringTry = False
471        self.suppress_warnings_ = []
472
473    def suppress_warning(self, warning_type: Diagnostics):
474        """
475        Suppress warnings emitted for a particular diagnostic on this expression.
476
477        Example::
478
479            base = pp.Forward()
480            base.suppress_warning(Diagnostics.warn_on_parse_using_empty_Forward)
481
482            # statement would normally raise a warning, but is now suppressed
483            print(base.parseString("x"))
484
485        """
486        self.suppress_warnings_.append(warning_type)
487        return self
488
489    def copy(self) -> "ParserElement":
490        """
491        Make a copy of this :class:`ParserElement`.  Useful for defining
492        different parse actions for the same parsing pattern, using copies of
493        the original parse element.
494
495        Example::
496
497            integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
498            integerK = integer.copy().add_parse_action(lambda toks: toks[0] * 1024) + Suppress("K")
499            integerM = integer.copy().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
500
501            print(OneOrMore(integerK | integerM | integer).parse_string("5K 100 640K 256M"))
502
503        prints::
504
505            [5120, 100, 655360, 268435456]
506
507        Equivalent form of ``expr.copy()`` is just ``expr()``::
508
509            integerM = integer().add_parse_action(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
510        """
511        cpy = copy.copy(self)
512        cpy.parseAction = self.parseAction[:]
513        cpy.ignoreExprs = self.ignoreExprs[:]
514        if self.copyDefaultWhiteChars:
515            cpy.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
516        return cpy
517
518    def set_results_name(
519        self, name: str, list_all_matches: bool = False, *, listAllMatches: bool = False
520    ) -> "ParserElement":
521        """
522        Define name for referencing matching tokens as a nested attribute
523        of the returned parse results.
524
525        Normally, results names are assigned as you would assign keys in a dict:
526        any existing value is overwritten by later values. If it is necessary to
527        keep all values captured for a particular results name, call ``set_results_name``
528        with ``list_all_matches`` = True.
529
530        NOTE: ``set_results_name`` returns a *copy* of the original :class:`ParserElement` object;
531        this is so that the client can define a basic element, such as an
532        integer, and reference it in multiple places with different names.
533
534        You can also set results names using the abbreviated syntax,
535        ``expr("name")`` in place of ``expr.set_results_name("name")``
536        - see :class:`__call__`. If ``list_all_matches`` is required, use
537        ``expr("name*")``.
538
539        Example::
540
541            date_str = (integer.set_results_name("year") + '/'
542                        + integer.set_results_name("month") + '/'
543                        + integer.set_results_name("day"))
544
545            # equivalent form:
546            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
547        """
548        listAllMatches = listAllMatches or list_all_matches
549        return self._setResultsName(name, listAllMatches)
550
551    def _setResultsName(self, name, listAllMatches=False):
552        if name is None:
553            return self
554        newself = self.copy()
555        if name.endswith("*"):
556            name = name[:-1]
557            listAllMatches = True
558        newself.resultsName = name
559        newself.modalResults = not listAllMatches
560        return newself
561
562    def set_break(self, break_flag: bool = True) -> "ParserElement":
563        """
564        Method to invoke the Python pdb debugger when this element is
565        about to be parsed. Set ``break_flag`` to ``True`` to enable, ``False`` to
566        disable.
567        """
568        if break_flag:
569            _parseMethod = self._parse
570
571            def breaker(instring, loc, doActions=True, callPreParse=True):
572                import pdb
573
574                # this call to pdb.set_trace() is intentional, not a checkin error
575                pdb.set_trace()
576                return _parseMethod(instring, loc, doActions, callPreParse)
577
578            breaker._originalParseMethod = _parseMethod
579            self._parse = breaker
580        else:
581            if hasattr(self._parse, "_originalParseMethod"):
582                self._parse = self._parse._originalParseMethod
583        return self
584
585    def set_parse_action(
586        self, *fns: ParseAction, **kwargs
587    ) -> OptionalType["ParserElement"]:
588        """
589        Define one or more actions to perform when successfully matching parse element definition.
590
591        Parse actions can be called to perform data conversions, do extra validation,
592        update external data structures, or enhance or replace the parsed tokens.
593        Each parse action ``fn`` is a callable method with 0-3 arguments, called as
594        ``fn(s, loc, toks)`` , ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
595
596        - s   = the original string being parsed (see note below)
597        - loc = the location of the matching substring
598        - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
599
600        The parsed tokens are passed to the parse action as ParseResults. They can be
601        modified in place using list-style append, extend, and pop operations to update
602        the parsed list elements; and with dictionary-style item set and del operations
603        to add, update, or remove any named results. If the tokens are modified in place,
604        it is not necessary to return them with a return statement.
605
606        Parse actions can also completely replace the given tokens, with another ``ParseResults``
607        object, or with some entirely different object (common for parse actions that perform data
608        conversions). A convenient way to build a new parse result is to define the values
609        using a dict, and then create the return value using :class:`ParseResults.from_dict`.
610
611        If None is passed as the ``fn`` parse action, all previously added parse actions for this
612        expression are cleared.
613
614        Optional keyword arguments:
615
616        - call_during_try = (default= ``False``) indicate if parse action should be run during
617          lookaheads and alternate testing. For parse actions that have side effects, it is
618          important to only call the parse action once it is determined that it is being
619          called as part of a successful parse. For parse actions that perform additional
620          validation, then call_during_try should be passed as True, so that the validation
621          code is included in the preliminary "try" parses.
622
623        Note: the default parsing behavior is to expand tabs in the input string
624        before starting the parsing process.  See :class:`parse_string` for more
625        information on parsing strings containing ``<TAB>`` s, and suggested
626        methods to maintain a consistent view of the parsed string, the parse
627        location, and line and column positions within the parsed string.
628
629        Example::
630
631            # parse dates in the form YYYY/MM/DD
632
633            # use parse action to convert toks from str to int at parse time
634            def convert_to_int(toks):
635                return int(toks[0])
636
637            # use a parse action to verify that the date is a valid date
638            def is_valid_date(toks):
639                from datetime import date
640                year, month, day = toks[::2]
641                try:
642                    date(year, month, day)
643                except ValueError:
644                    raise ParseException("invalid date given")
645
646            integer = Word(nums)
647            date_str = integer + '/' + integer + '/' + integer
648
649            # add parse actions
650            integer.set_parse_action(convert_to_int)
651            date_str.set_parse_action(is_valid_date)
652
653            # note that integer fields are now ints, not strings
654            date_str.run_tests('''
655                # successful parse - note that integer fields were converted to ints
656                1999/12/31
657
658                # fail - invalid date
659                1999/13/31
660                ''')
661        """
662        if list(fns) == [None]:
663            self.parseAction = []
664        else:
665            if not all(callable(fn) for fn in fns):
666                raise TypeError("parse actions must be callable")
667            self.parseAction = list(map(_trim_arity, list(fns)))
668            self.callDuringTry = kwargs.get(
669                "call_during_try", kwargs.get("callDuringTry", False)
670            )
671        return self
672
673    def add_parse_action(self, *fns: ParseAction, **kwargs) -> "ParserElement":
674        """
675        Add one or more parse actions to expression's list of parse actions. See :class:`set_parse_action`.
676
677        See examples in :class:`copy`.
678        """
679        self.parseAction += list(map(_trim_arity, list(fns)))
680        self.callDuringTry = self.callDuringTry or kwargs.get(
681            "call_during_try", kwargs.get("callDuringTry", False)
682        )
683        return self
684
685    def add_condition(self, *fns: ParseCondition, **kwargs) -> "ParserElement":
686        """Add a boolean predicate function to expression's list of parse actions. See
687        :class:`set_parse_action` for function call signatures. Unlike ``set_parse_action``,
688        functions passed to ``add_condition`` need to return boolean success/fail of the condition.
689
690        Optional keyword arguments:
691
692        - message = define a custom message to be used in the raised exception
693        - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise
694          ParseException
695        - call_during_try = boolean to indicate if this method should be called during internal tryParse calls,
696          default=False
697
698        Example::
699
700            integer = Word(nums).set_parse_action(lambda toks: int(toks[0]))
701            year_int = integer.copy()
702            year_int.add_condition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
703            date_str = year_int + '/' + integer + '/' + integer
704
705            result = date_str.parse_string("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0),
706                                                                         (line:1, col:1)
707        """
708        for fn in fns:
709            self.parseAction.append(
710                condition_as_parse_action(
711                    fn, message=kwargs.get("message"), fatal=kwargs.get("fatal", False)
712                )
713            )
714
715        self.callDuringTry = self.callDuringTry or kwargs.get(
716            "call_during_try", kwargs.get("callDuringTry", False)
717        )
718        return self
719
720    def set_fail_action(self, fn: ParseFailAction) -> "ParserElement":
721        """
722        Define action to perform if parsing fails at this expression.
723        Fail acton fn is a callable function that takes the arguments
724        ``fn(s, loc, expr, err)`` where:
725
726        - s = string being parsed
727        - loc = location where expression match was attempted and failed
728        - expr = the parse expression that failed
729        - err = the exception thrown
730
731        The function returns no value.  It may throw :class:`ParseFatalException`
732        if it is desired to stop parsing immediately."""
733        self.failAction = fn
734        return self
735
736    def _skipIgnorables(self, instring, loc):
737        exprsFound = True
738        while exprsFound:
739            exprsFound = False
740            for e in self.ignoreExprs:
741                try:
742                    while 1:
743                        loc, dummy = e._parse(instring, loc)
744                        exprsFound = True
745                except ParseException:
746                    pass
747        return loc
748
749    def preParse(self, instring, loc):
750        if self.ignoreExprs:
751            loc = self._skipIgnorables(instring, loc)
752
753        if self.skipWhitespace:
754            instrlen = len(instring)
755            white_chars = self.whiteChars
756            while loc < instrlen and instring[loc] in white_chars:
757                loc += 1
758
759        return loc
760
761    def parseImpl(self, instring, loc, doActions=True):
762        return loc, []
763
764    def postParse(self, instring, loc, tokenlist):
765        return tokenlist
766
767    # @profile
768    def _parseNoCache(
769        self, instring, loc, doActions=True, callPreParse=True
770    ) -> Tuple[int, ParseResults]:
771        TRY, MATCH, FAIL = 0, 1, 2
772        debugging = self.debug  # and doActions)
773        len_instring = len(instring)
774
775        if debugging or self.failAction:
776            # print("Match {} at loc {}({}, {})".format(self, loc, lineno(loc, instring), col(loc, instring)))
777            try:
778                if callPreParse and self.callPreparse:
779                    pre_loc = self.preParse(instring, loc)
780                else:
781                    pre_loc = loc
782                tokens_start = pre_loc
783                if self.debugActions[TRY]:
784                    self.debugActions[TRY](instring, tokens_start, self)
785                if self.mayIndexError or pre_loc >= len_instring:
786                    try:
787                        loc, tokens = self.parseImpl(instring, pre_loc, doActions)
788                    except IndexError:
789                        raise ParseException(instring, len_instring, self.errmsg, self)
790                else:
791                    loc, tokens = self.parseImpl(instring, pre_loc, doActions)
792            except Exception as err:
793                # print("Exception raised:", err)
794                if self.debugActions[FAIL]:
795                    self.debugActions[FAIL](instring, tokens_start, self, err)
796                if self.failAction:
797                    self.failAction(instring, tokens_start, self, err)
798                raise
799        else:
800            if callPreParse and self.callPreparse:
801                pre_loc = self.preParse(instring, loc)
802            else:
803                pre_loc = loc
804            tokens_start = pre_loc
805            if self.mayIndexError or pre_loc >= len_instring:
806                try:
807                    loc, tokens = self.parseImpl(instring, pre_loc, doActions)
808                except IndexError:
809                    raise ParseException(instring, len_instring, self.errmsg, self)
810            else:
811                loc, tokens = self.parseImpl(instring, pre_loc, doActions)
812
813        tokens = self.postParse(instring, loc, tokens)
814
815        ret_tokens = ParseResults(
816            tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults
817        )
818        if self.parseAction and (doActions or self.callDuringTry):
819            if debugging:
820                try:
821                    for fn in self.parseAction:
822                        try:
823                            tokens = fn(instring, tokens_start, ret_tokens)
824                        except IndexError as parse_action_exc:
825                            exc = ParseException("exception raised in parse action")
826                            raise exc from parse_action_exc
827
828                        if tokens is not None and tokens is not ret_tokens:
829                            ret_tokens = ParseResults(
830                                tokens,
831                                self.resultsName,
832                                asList=self.saveAsList
833                                and isinstance(tokens, (ParseResults, list)),
834                                modal=self.modalResults,
835                            )
836                except Exception as err:
837                    # print "Exception raised in user parse action:", err
838                    if self.debugActions[FAIL]:
839                        self.debugActions[FAIL](instring, tokens_start, self, err)
840                    raise
841            else:
842                for fn in self.parseAction:
843                    try:
844                        tokens = fn(instring, tokens_start, ret_tokens)
845                    except IndexError as parse_action_exc:
846                        exc = ParseException("exception raised in parse action")
847                        raise exc from parse_action_exc
848
849                    if tokens is not None and tokens is not ret_tokens:
850                        ret_tokens = ParseResults(
851                            tokens,
852                            self.resultsName,
853                            asList=self.saveAsList
854                            and isinstance(tokens, (ParseResults, list)),
855                            modal=self.modalResults,
856                        )
857        if debugging:
858            # print("Matched", self, "->", ret_tokens.as_list())
859            if self.debugActions[MATCH]:
860                self.debugActions[MATCH](instring, tokens_start, loc, self, ret_tokens)
861
862        return loc, ret_tokens
863
864    def try_parse(self, instring: str, loc: int, raise_fatal: bool = False) -> int:
865        try:
866            return self._parse(instring, loc, doActions=False)[0]
867        except ParseFatalException:
868            if raise_fatal:
869                raise
870            raise ParseException(instring, loc, self.errmsg, self)
871
872    def can_parse_next(self, instring: str, loc: int) -> bool:
873        try:
874            self.try_parse(instring, loc)
875        except (ParseException, IndexError):
876            return False
877        else:
878            return True
879
880    # cache for left-recursion in Forward references
881    recursion_lock = RLock()
882    recursion_memos: DictType[
883        Tuple[int, "Forward", bool], Tuple[int, Union[ParseResults, Exception]]
884    ] = {}
885
886    # argument cache for optimizing repeated calls when backtracking through recursive expressions
887    packrat_cache = (
888        {}
889    )  # this is set later by enabled_packrat(); this is here so that reset_cache() doesn't fail
890    packrat_cache_lock = RLock()
891    packrat_cache_stats = [0, 0]
892
893    # this method gets repeatedly called during backtracking with the same arguments -
894    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
895    def _parseCache(
896        self, instring, loc, doActions=True, callPreParse=True
897    ) -> Tuple[int, ParseResults]:
898        HIT, MISS = 0, 1
899        TRY, MATCH, FAIL = 0, 1, 2
900        lookup = (self, instring, loc, callPreParse, doActions)
901        with ParserElement.packrat_cache_lock:
902            cache = ParserElement.packrat_cache
903            value = cache.get(lookup)
904            if value is cache.not_in_cache:
905                ParserElement.packrat_cache_stats[MISS] += 1
906                try:
907                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
908                except ParseBaseException as pe:
909                    # cache a copy of the exception, without the traceback
910                    cache.set(lookup, pe.__class__(*pe.args))
911                    raise
912                else:
913                    cache.set(lookup, (value[0], value[1].copy(), loc))
914                    return value
915            else:
916                ParserElement.packrat_cache_stats[HIT] += 1
917                if self.debug and self.debugActions[TRY]:
918                    try:
919                        self.debugActions[TRY](instring, loc, self, cache_hit=True)
920                    except TypeError:
921                        pass
922                if isinstance(value, Exception):
923                    if self.debug and self.debugActions[FAIL]:
924                        try:
925                            self.debugActions[FAIL](
926                                instring, loc, self, value, cache_hit=True
927                            )
928                        except TypeError:
929                            pass
930                    raise value
931
932                loc_, result, endloc = value[0], value[1].copy(), value[2]
933                if self.debug and self.debugActions[MATCH]:
934                    try:
935                        self.debugActions[MATCH](
936                            instring, loc_, endloc, self, result, cache_hit=True
937                        )
938                    except TypeError:
939                        pass
940
941                return loc_, result
942
943    _parse = _parseNoCache
944
945    @staticmethod
946    def reset_cache() -> None:
947        ParserElement.packrat_cache.clear()
948        ParserElement.packrat_cache_stats[:] = [0] * len(
949            ParserElement.packrat_cache_stats
950        )
951        ParserElement.recursion_memos.clear()
952
953    _packratEnabled = False
954    _left_recursion_enabled = False
955
956    @staticmethod
957    def disable_memoization() -> None:
958        """
959        Disables active Packrat or Left Recursion parsing and their memoization
960
961        This method also works if neither Packrat nor Left Recursion are enabled.
962        This makes it safe to call before activating Packrat nor Left Recursion
963        to clear any previous settings.
964        """
965        ParserElement.reset_cache()
966        ParserElement._left_recursion_enabled = False
967        ParserElement._packratEnabled = False
968        ParserElement._parse = ParserElement._parseNoCache
969
970    @staticmethod
971    def enable_left_recursion(
972        cache_size_limit: OptionalType[int] = None, *, force=False
973    ) -> None:
974        """
975        Enables "bounded recursion" parsing, which allows for both direct and indirect
976        left-recursion. During parsing, left-recursive :class:`Forward` elements are
977        repeatedly matched with a fixed recursion depth that is gradually increased
978        until finding the longest match.
979
980        Example::
981
982            import pyparsing as pp
983            pp.ParserElement.enable_left_recursion()
984
985            E = pp.Forward("E")
986            num = pp.Word(pp.nums)
987            # match `num`, or `num '+' num`, or `num '+' num '+' num`, ...
988            E <<= E + '+' - num | num
989
990            print(E.parse_string("1+2+3"))
991
992        Recursion search naturally memoizes matches of ``Forward`` elements and may
993        thus skip reevaluation of parse actions during backtracking. This may break
994        programs with parse actions which rely on strict ordering of side-effects.
995
996        Parameters:
997
998        - cache_size_limit - (default=``None``) - memoize at most this many
999          ``Forward`` elements during matching; if ``None`` (the default),
1000          memoize all ``Forward`` elements.
1001
1002        Bounded Recursion parsing works similar but not identical to Packrat parsing,
1003        thus the two cannot be used together. Use ``force=True`` to disable any
1004        previous, conflicting settings.
1005        """
1006        if force:
1007            ParserElement.disable_memoization()
1008        elif ParserElement._packratEnabled:
1009            raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1010        if cache_size_limit is None:
1011            ParserElement.recursion_memos = _UnboundedMemo()
1012        elif cache_size_limit > 0:
1013            ParserElement.recursion_memos = _LRUMemo(capacity=cache_size_limit)
1014        else:
1015            raise NotImplementedError("Memo size of %s" % cache_size_limit)
1016        ParserElement._left_recursion_enabled = True
1017
1018    @staticmethod
1019    def enable_packrat(cache_size_limit: int = 128, *, force: bool = False) -> None:
1020        """
1021        Enables "packrat" parsing, which adds memoizing to the parsing logic.
1022        Repeated parse attempts at the same string location (which happens
1023        often in many complex grammars) can immediately return a cached value,
1024        instead of re-executing parsing/validating code.  Memoizing is done of
1025        both valid results and parsing exceptions.
1026
1027        Parameters:
1028
1029        - cache_size_limit - (default= ``128``) - if an integer value is provided
1030          will limit the size of the packrat cache; if None is passed, then
1031          the cache size will be unbounded; if 0 is passed, the cache will
1032          be effectively disabled.
1033
1034        This speedup may break existing programs that use parse actions that
1035        have side-effects.  For this reason, packrat parsing is disabled when
1036        you first import pyparsing.  To activate the packrat feature, your
1037        program must call the class method :class:`ParserElement.enable_packrat`.
1038        For best results, call ``enable_packrat()`` immediately after
1039        importing pyparsing.
1040
1041        Example::
1042
1043            import pyparsing
1044            pyparsing.ParserElement.enable_packrat()
1045
1046        Packrat parsing works similar but not identical to Bounded Recursion parsing,
1047        thus the two cannot be used together. Use ``force=True`` to disable any
1048        previous, conflicting settings.
1049        """
1050        if force:
1051            ParserElement.disable_memoization()
1052        elif ParserElement._left_recursion_enabled:
1053            raise RuntimeError("Packrat and Bounded Recursion are not compatible")
1054        if not ParserElement._packratEnabled:
1055            ParserElement._packratEnabled = True
1056            if cache_size_limit is None:
1057                ParserElement.packrat_cache = _UnboundedCache()
1058            else:
1059                ParserElement.packrat_cache = _FifoCache(cache_size_limit)
1060            ParserElement._parse = ParserElement._parseCache
1061
1062    def parse_string(
1063        self, instring: str, parse_all: bool = False, *, parseAll: bool = False
1064    ) -> ParseResults:
1065        """
1066        Parse a string with respect to the parser definition. This function is intended as the primary interface to the
1067        client code.
1068
1069        :param instring: The input string to be parsed.
1070        :param parse_all: If set, the entire input string must match the grammar.
1071        :param parseAll: retained for pre-PEP8 compatibility, will be removed in a future release.
1072        :raises ParseException: Raised if ``parse_all`` is set and the input string does not match the whole grammar.
1073        :returns: the parsed data as a :class:`ParseResults` object, which may be accessed as a `list`, a `dict`, or
1074          an object with attributes if the given parser includes results names.
1075
1076        If the input string is required to match the entire grammar, ``parse_all`` flag must be set to ``True``. This
1077        is also equivalent to ending the grammar with :class:`StringEnd`().
1078
1079        To report proper column numbers, ``parse_string`` operates on a copy of the input string where all tabs are
1080        converted to spaces (8 spaces per tab, as per the default in ``string.expandtabs``). If the input string
1081        contains tabs and the grammar uses parse actions that use the ``loc`` argument to index into the string
1082        being parsed, one can ensure a consistent view of the input string by doing one of the following:
1083
1084        - calling ``parse_with_tabs`` on your grammar before calling ``parse_string`` (see :class:`parse_with_tabs`),
1085        - define your parse action using the full ``(s,loc,toks)`` signature, and reference the input string using the
1086          parse action's ``s`` argument, or
1087        - explicitly expand the tabs in your input string before calling ``parse_string``.
1088
1089        Examples:
1090
1091        By default, partial matches are OK.
1092
1093        >>> res = Word('a').parse_string('aaaaabaaa')
1094        >>> print(res)
1095        ['aaaaa']
1096
1097        The parsing behavior varies by the inheriting class of this abstract class. Please refer to the children
1098        directly to see more examples.
1099
1100        It raises an exception if parse_all flag is set and instring does not match the whole grammar.
1101
1102        >>> res = Word('a').parse_string('aaaaabaaa', parse_all=True)
1103        Traceback (most recent call last):
1104        ...
1105        pyparsing.ParseException: Expected end of text, found 'b'  (at char 5), (line:1, col:6)
1106        """
1107        parseAll = parse_all or parseAll
1108
1109        ParserElement.reset_cache()
1110        if not self.streamlined:
1111            self.streamline()
1112        for e in self.ignoreExprs:
1113            e.streamline()
1114        if not self.keepTabs:
1115            instring = instring.expandtabs()
1116        try:
1117            loc, tokens = self._parse(instring, 0)
1118            if parseAll:
1119                loc = self.preParse(instring, loc)
1120                se = Empty() + StringEnd()
1121                se._parse(instring, loc)
1122        except ParseBaseException as exc:
1123            if ParserElement.verbose_stacktrace:
1124                raise
1125            else:
1126                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1127                raise exc.with_traceback(None)
1128        else:
1129            return tokens
1130
1131    def scan_string(
1132        self,
1133        instring: str,
1134        max_matches: int = _MAX_INT,
1135        overlap: bool = False,
1136        *,
1137        debug: bool = False,
1138        maxMatches: int = _MAX_INT,
1139    ) -> Generator[Tuple[ParseResults, int, int], None, None]:
1140        """
1141        Scan the input string for expression matches.  Each match will return the
1142        matching tokens, start location, and end location.  May be called with optional
1143        ``max_matches`` argument, to clip scanning after 'n' matches are found.  If
1144        ``overlap`` is specified, then overlapping matches will be reported.
1145
1146        Note that the start and end locations are reported relative to the string
1147        being parsed.  See :class:`parse_string` for more information on parsing
1148        strings with embedded tabs.
1149
1150        Example::
1151
1152            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1153            print(source)
1154            for tokens, start, end in Word(alphas).scan_string(source):
1155                print(' '*start + '^'*(end-start))
1156                print(' '*start + tokens[0])
1157
1158        prints::
1159
1160            sldjf123lsdjjkf345sldkjf879lkjsfd987
1161            ^^^^^
1162            sldjf
1163                    ^^^^^^^
1164                    lsdjjkf
1165                              ^^^^^^
1166                              sldkjf
1167                                       ^^^^^^
1168                                       lkjsfd
1169        """
1170        maxMatches = min(maxMatches, max_matches)
1171        if not self.streamlined:
1172            self.streamline()
1173        for e in self.ignoreExprs:
1174            e.streamline()
1175
1176        if not self.keepTabs:
1177            instring = str(instring).expandtabs()
1178        instrlen = len(instring)
1179        loc = 0
1180        preparseFn = self.preParse
1181        parseFn = self._parse
1182        ParserElement.resetCache()
1183        matches = 0
1184        try:
1185            while loc <= instrlen and matches < maxMatches:
1186                try:
1187                    preloc = preparseFn(instring, loc)
1188                    nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1189                except ParseException:
1190                    loc = preloc + 1
1191                else:
1192                    if nextLoc > loc:
1193                        matches += 1
1194                        if debug:
1195                            print(
1196                                {
1197                                    "tokens": tokens.asList(),
1198                                    "start": preloc,
1199                                    "end": nextLoc,
1200                                }
1201                            )
1202                        yield tokens, preloc, nextLoc
1203                        if overlap:
1204                            nextloc = preparseFn(instring, loc)
1205                            if nextloc > loc:
1206                                loc = nextLoc
1207                            else:
1208                                loc += 1
1209                        else:
1210                            loc = nextLoc
1211                    else:
1212                        loc = preloc + 1
1213        except ParseBaseException as exc:
1214            if ParserElement.verbose_stacktrace:
1215                raise
1216            else:
1217                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1218                raise exc.with_traceback(None)
1219
1220    def transform_string(self, instring: str, *, debug: bool = False) -> str:
1221        """
1222        Extension to :class:`scan_string`, to modify matching text with modified tokens that may
1223        be returned from a parse action.  To use ``transform_string``, define a grammar and
1224        attach a parse action to it that modifies the returned token list.
1225        Invoking ``transform_string()`` on a target string will then scan for matches,
1226        and replace the matched text patterns according to the logic in the parse
1227        action.  ``transform_string()`` returns the resulting transformed string.
1228
1229        Example::
1230
1231            wd = Word(alphas)
1232            wd.set_parse_action(lambda toks: toks[0].title())
1233
1234            print(wd.transform_string("now is the winter of our discontent made glorious summer by this sun of york."))
1235
1236        prints::
1237
1238            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1239        """
1240        out = []
1241        lastE = 0
1242        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1243        # keep string locs straight between transform_string and scan_string
1244        self.keepTabs = True
1245        try:
1246            for t, s, e in self.scan_string(instring, debug=debug):
1247                out.append(instring[lastE:s])
1248                if t:
1249                    if isinstance(t, ParseResults):
1250                        out += t.as_list()
1251                    elif isinstance(t, Iterable) and not isinstance(t, str_type):
1252                        out += list(t)
1253                    else:
1254                        out.append(t)
1255                lastE = e
1256            out.append(instring[lastE:])
1257            out = [o for o in out if o]
1258            return "".join(map(str, _flatten(out)))
1259        except ParseBaseException as exc:
1260            if ParserElement.verbose_stacktrace:
1261                raise
1262            else:
1263                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1264                raise exc.with_traceback(None)
1265
1266    def search_string(
1267        self,
1268        instring: str,
1269        max_matches: int = _MAX_INT,
1270        *,
1271        debug: bool = False,
1272        maxMatches: int = _MAX_INT,
1273    ) -> ParseResults:
1274        """
1275        Another extension to :class:`scan_string`, simplifying the access to the tokens found
1276        to match the given parse expression.  May be called with optional
1277        ``max_matches`` argument, to clip searching after 'n' matches are found.
1278
1279        Example::
1280
1281            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1282            cap_word = Word(alphas.upper(), alphas.lower())
1283
1284            print(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity"))
1285
1286            # the sum() builtin can be used to merge results into a single ParseResults object
1287            print(sum(cap_word.search_string("More than Iron, more than Lead, more than Gold I need Electricity")))
1288
1289        prints::
1290
1291            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1292            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1293        """
1294        maxMatches = min(maxMatches, max_matches)
1295        try:
1296            return ParseResults(
1297                [t for t, s, e in self.scan_string(instring, maxMatches, debug=debug)]
1298            )
1299        except ParseBaseException as exc:
1300            if ParserElement.verbose_stacktrace:
1301                raise
1302            else:
1303                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1304                raise exc.with_traceback(None)
1305
1306    def split(
1307        self,
1308        instring: str,
1309        maxsplit: int = _MAX_INT,
1310        include_separators: bool = False,
1311        *,
1312        includeSeparators=False,
1313    ) -> Generator[str, None, None]:
1314        """
1315        Generator method to split a string using the given expression as a separator.
1316        May be called with optional ``maxsplit`` argument, to limit the number of splits;
1317        and the optional ``include_separators`` argument (default= ``False``), if the separating
1318        matching text should be included in the split results.
1319
1320        Example::
1321
1322            punc = one_of(list(".,;:/-!?"))
1323            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1324
1325        prints::
1326
1327            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1328        """
1329        includeSeparators = includeSeparators or include_separators
1330        last = 0
1331        for t, s, e in self.scan_string(instring, max_matches=maxsplit):
1332            yield instring[last:s]
1333            if includeSeparators:
1334                yield t[0]
1335            last = e
1336        yield instring[last:]
1337
1338    def __add__(self, other):
1339        """
1340        Implementation of ``+`` operator - returns :class:`And`. Adding strings to a :class:`ParserElement`
1341        converts them to :class:`Literal`s by default.
1342
1343        Example::
1344
1345            greet = Word(alphas) + "," + Word(alphas) + "!"
1346            hello = "Hello, World!"
1347            print(hello, "->", greet.parse_string(hello))
1348
1349        prints::
1350
1351            Hello, World! -> ['Hello', ',', 'World', '!']
1352
1353        ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
1354
1355            Literal('start') + ... + Literal('end')
1356
1357        is equivalent to:
1358
1359            Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
1360
1361        Note that the skipped text is returned with '_skipped' as a results name,
1362        and to support having multiple skips in the same parser, the value returned is
1363        a list of all skipped text.
1364        """
1365        if other is Ellipsis:
1366            return _PendingSkip(self)
1367
1368        if isinstance(other, str_type):
1369            other = self._literalStringClass(other)
1370        if not isinstance(other, ParserElement):
1371            raise TypeError(
1372                "Cannot combine element of type {} with ParserElement".format(
1373                    type(other).__name__
1374                )
1375            )
1376        return And([self, other])
1377
1378    def __radd__(self, other):
1379        """
1380        Implementation of ``+`` operator when left operand is not a :class:`ParserElement`
1381        """
1382        if other is Ellipsis:
1383            return SkipTo(self)("_skipped*") + self
1384
1385        if isinstance(other, str_type):
1386            other = self._literalStringClass(other)
1387        if not isinstance(other, ParserElement):
1388            raise TypeError(
1389                "Cannot combine element of type {} with ParserElement".format(
1390                    type(other).__name__
1391                )
1392            )
1393        return other + self
1394
1395    def __sub__(self, other):
1396        """
1397        Implementation of ``-`` operator, returns :class:`And` with error stop
1398        """
1399        if isinstance(other, str_type):
1400            other = self._literalStringClass(other)
1401        if not isinstance(other, ParserElement):
1402            raise TypeError(
1403                "Cannot combine element of type {} with ParserElement".format(
1404                    type(other).__name__
1405                )
1406            )
1407        return self + And._ErrorStop() + other
1408
1409    def __rsub__(self, other):
1410        """
1411        Implementation of ``-`` operator when left operand is not a :class:`ParserElement`
1412        """
1413        if isinstance(other, str_type):
1414            other = self._literalStringClass(other)
1415        if not isinstance(other, ParserElement):
1416            raise TypeError(
1417                "Cannot combine element of type {} with ParserElement".format(
1418                    type(other).__name__
1419                )
1420            )
1421        return other - self
1422
1423    def __mul__(self, other):
1424        """
1425        Implementation of ``*`` operator, allows use of ``expr * 3`` in place of
1426        ``expr + expr + expr``.  Expressions may also be multiplied by a 2-integer
1427        tuple, similar to ``{min, max}`` multipliers in regular expressions.  Tuples
1428        may also include ``None`` as in:
1429        - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
1430             to ``expr*n + ZeroOrMore(expr)``
1431             (read as "at least n instances of ``expr``")
1432        - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
1433             (read as "0 to n instances of ``expr``")
1434        - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
1435        - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
1436
1437        Note that ``expr*(None, n)`` does not raise an exception if
1438        more than n exprs exist in the input stream; that is,
1439        ``expr*(None, n)`` does not enforce a maximum number of expr
1440        occurrences.  If this behavior is desired, then write
1441        ``expr*(None, n) + ~expr``
1442        """
1443        if other is Ellipsis:
1444            other = (0, None)
1445        elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
1446            other = ((0,) + other[1:] + (None,))[:2]
1447
1448        if isinstance(other, int):
1449            minElements, optElements = other, 0
1450        elif isinstance(other, tuple):
1451            other = tuple(o if o is not Ellipsis else None for o in other)
1452            other = (other + (None, None))[:2]
1453            if other[0] is None:
1454                other = (0, other[1])
1455            if isinstance(other[0], int) and other[1] is None:
1456                if other[0] == 0:
1457                    return ZeroOrMore(self)
1458                if other[0] == 1:
1459                    return OneOrMore(self)
1460                else:
1461                    return self * other[0] + ZeroOrMore(self)
1462            elif isinstance(other[0], int) and isinstance(other[1], int):
1463                minElements, optElements = other
1464                optElements -= minElements
1465            else:
1466                raise TypeError(
1467                    "cannot multiply ParserElement and ({}) objects".format(
1468                        ",".join(type(item).__name__ for item in other)
1469                    )
1470                )
1471        else:
1472            raise TypeError(
1473                "cannot multiply ParserElement and {} objects".format(
1474                    type(other).__name__
1475                )
1476            )
1477
1478        if minElements < 0:
1479            raise ValueError("cannot multiply ParserElement by negative value")
1480        if optElements < 0:
1481            raise ValueError(
1482                "second tuple value must be greater or equal to first tuple value"
1483            )
1484        if minElements == optElements == 0:
1485            return And([])
1486
1487        if optElements:
1488
1489            def makeOptionalList(n):
1490                if n > 1:
1491                    return Opt(self + makeOptionalList(n - 1))
1492                else:
1493                    return Opt(self)
1494
1495            if minElements:
1496                if minElements == 1:
1497                    ret = self + makeOptionalList(optElements)
1498                else:
1499                    ret = And([self] * minElements) + makeOptionalList(optElements)
1500            else:
1501                ret = makeOptionalList(optElements)
1502        else:
1503            if minElements == 1:
1504                ret = self
1505            else:
1506                ret = And([self] * minElements)
1507        return ret
1508
1509    def __rmul__(self, other):
1510        return self.__mul__(other)
1511
1512    def __or__(self, other):
1513        """
1514        Implementation of ``|`` operator - returns :class:`MatchFirst`
1515        """
1516        if other is Ellipsis:
1517            return _PendingSkip(self, must_skip=True)
1518
1519        if isinstance(other, str_type):
1520            other = self._literalStringClass(other)
1521        if not isinstance(other, ParserElement):
1522            raise TypeError(
1523                "Cannot combine element of type {} with ParserElement".format(
1524                    type(other).__name__
1525                )
1526            )
1527        return MatchFirst([self, other])
1528
1529    def __ror__(self, other):
1530        """
1531        Implementation of ``|`` operator when left operand is not a :class:`ParserElement`
1532        """
1533        if isinstance(other, str_type):
1534            other = self._literalStringClass(other)
1535        if not isinstance(other, ParserElement):
1536            raise TypeError(
1537                "Cannot combine element of type {} with ParserElement".format(
1538                    type(other).__name__
1539                )
1540            )
1541        return other | self
1542
1543    def __xor__(self, other):
1544        """
1545        Implementation of ``^`` operator - returns :class:`Or`
1546        """
1547        if isinstance(other, str_type):
1548            other = self._literalStringClass(other)
1549        if not isinstance(other, ParserElement):
1550            raise TypeError(
1551                "Cannot combine element of type {} with ParserElement".format(
1552                    type(other).__name__
1553                )
1554            )
1555        return Or([self, other])
1556
1557    def __rxor__(self, other):
1558        """
1559        Implementation of ``^`` operator when left operand is not a :class:`ParserElement`
1560        """
1561        if isinstance(other, str_type):
1562            other = self._literalStringClass(other)
1563        if not isinstance(other, ParserElement):
1564            raise TypeError(
1565                "Cannot combine element of type {} with ParserElement".format(
1566                    type(other).__name__
1567                )
1568            )
1569        return other ^ self
1570
1571    def __and__(self, other):
1572        """
1573        Implementation of ``&`` operator - returns :class:`Each`
1574        """
1575        if isinstance(other, str_type):
1576            other = self._literalStringClass(other)
1577        if not isinstance(other, ParserElement):
1578            raise TypeError(
1579                "Cannot combine element of type {} with ParserElement".format(
1580                    type(other).__name__
1581                )
1582            )
1583        return Each([self, other])
1584
1585    def __rand__(self, other):
1586        """
1587        Implementation of ``&`` operator when left operand is not a :class:`ParserElement`
1588        """
1589        if isinstance(other, str_type):
1590            other = self._literalStringClass(other)
1591        if not isinstance(other, ParserElement):
1592            raise TypeError(
1593                "Cannot combine element of type {} with ParserElement".format(
1594                    type(other).__name__
1595                )
1596            )
1597        return other & self
1598
1599    def __invert__(self):
1600        """
1601        Implementation of ``~`` operator - returns :class:`NotAny`
1602        """
1603        return NotAny(self)
1604
1605    # disable __iter__ to override legacy use of sequential access to __getitem__ to
1606    # iterate over a sequence
1607    __iter__ = None
1608
1609    def __getitem__(self, key):
1610        """
1611        use ``[]`` indexing notation as a short form for expression repetition:
1612
1613        - ``expr[n]`` is equivalent to ``expr*n``
1614        - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
1615        - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
1616             to ``expr*n + ZeroOrMore(expr)``
1617             (read as "at least n instances of ``expr``")
1618        - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
1619             (read as "0 to n instances of ``expr``")
1620        - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
1621        - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
1622
1623        ``None`` may be used in place of ``...``.
1624
1625        Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
1626        if more than ``n`` ``expr``s exist in the input stream.  If this behavior is
1627        desired, then write ``expr[..., n] + ~expr``.
1628        """
1629
1630        # convert single arg keys to tuples
1631        try:
1632            if isinstance(key, str_type):
1633                key = (key,)
1634            iter(key)
1635        except TypeError:
1636            key = (key, key)
1637
1638        if len(key) > 2:
1639            raise TypeError(
1640                "only 1 or 2 index arguments supported ({}{})".format(
1641                    key[:5], "... [{}]".format(len(key)) if len(key) > 5 else ""
1642                )
1643            )
1644
1645        # clip to 2 elements
1646        ret = self * tuple(key[:2])
1647        return ret
1648
1649    def __call__(self, name: str = None):
1650        """
1651        Shortcut for :class:`set_results_name`, with ``list_all_matches=False``.
1652
1653        If ``name`` is given with a trailing ``'*'`` character, then ``list_all_matches`` will be
1654        passed as ``True``.
1655
1656        If ``name` is omitted, same as calling :class:`copy`.
1657
1658        Example::
1659
1660            # these are equivalent
1661            userdata = Word(alphas).set_results_name("name") + Word(nums + "-").set_results_name("socsecno")
1662            userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
1663        """
1664        if name is not None:
1665            return self._setResultsName(name)
1666        else:
1667            return self.copy()
1668
1669    def suppress(self) -> "ParserElement":
1670        """
1671        Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
1672        cluttering up returned output.
1673        """
1674        return Suppress(self)
1675
1676    def ignore_whitespace(self, recursive: bool = True) -> "ParserElement":
1677        """
1678        Enables the skipping of whitespace before matching the characters in the
1679        :class:`ParserElement`'s defined pattern.
1680
1681        :param recursive: If ``True`` (the default), also enable whitespace skipping in child elements (if any)
1682        """
1683        self.skipWhitespace = True
1684        return self
1685
1686    def leave_whitespace(self, recursive: bool = True) -> "ParserElement":
1687        """
1688        Disables the skipping of whitespace before matching the characters in the
1689        :class:`ParserElement`'s defined pattern.  This is normally only used internally by
1690        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1691
1692        :param recursive: If true (the default), also disable whitespace skipping in child elements (if any)
1693        """
1694        self.skipWhitespace = False
1695        return self
1696
1697    def set_whitespace_chars(
1698        self, chars: Union[Set[str], str], copy_defaults: bool = False
1699    ) -> "ParserElement":
1700        """
1701        Overrides the default whitespace chars
1702        """
1703        self.skipWhitespace = True
1704        self.whiteChars = set(chars)
1705        self.copyDefaultWhiteChars = copy_defaults
1706        return self
1707
1708    def parse_with_tabs(self) -> "ParserElement":
1709        """
1710        Overrides default behavior to expand ``<TAB>`` s to spaces before parsing the input string.
1711        Must be called before ``parse_string`` when the input grammar contains elements that
1712        match ``<TAB>`` characters.
1713        """
1714        self.keepTabs = True
1715        return self
1716
1717    def ignore(self, other: "ParserElement") -> "ParserElement":
1718        """
1719        Define expression to be ignored (e.g., comments) while doing pattern
1720        matching; may be called repeatedly, to define multiple comment or other
1721        ignorable patterns.
1722
1723        Example::
1724
1725            patt = OneOrMore(Word(alphas))
1726            patt.parse_string('ablaj /* comment */ lskjd')
1727            # -> ['ablaj']
1728
1729            patt.ignore(c_style_comment)
1730            patt.parse_string('ablaj /* comment */ lskjd')
1731            # -> ['ablaj', 'lskjd']
1732        """
1733        import typing
1734
1735        if isinstance(other, str_type):
1736            other = Suppress(other)
1737
1738        if isinstance(other, Suppress):
1739            if other not in self.ignoreExprs:
1740                self.ignoreExprs.append(other)
1741        else:
1742            self.ignoreExprs.append(Suppress(other.copy()))
1743        return self
1744
1745    def set_debug_actions(
1746        self,
1747        start_action: DebugStartAction,
1748        success_action: DebugSuccessAction,
1749        exception_action: DebugExceptionAction,
1750    ) -> "ParserElement":
1751        """
1752        Customize display of debugging messages while doing pattern matching:
1753
1754        - ``start_action`` - method to be called when an expression is about to be parsed;
1755          should have the signature ``fn(input_string: str, location: int, expression: ParserElement, cache_hit: bool)``
1756
1757        - ``success_action`` - method to be called when an expression has successfully parsed;
1758          should have the signature ``fn(input_string: str, start_location: int, end_location: int, expression: ParserELement, parsed_tokens: ParseResults, cache_hit: bool)``
1759
1760        - ``exception_action`` - method to be called when expression fails to parse;
1761          should have the signature ``fn(input_string: str, location: int, expression: ParserElement, exception: Exception, cache_hit: bool)``
1762        """
1763        self.debugActions = (
1764            start_action or _default_start_debug_action,
1765            success_action or _default_success_debug_action,
1766            exception_action or _default_exception_debug_action,
1767        )
1768        self.debug = True
1769        return self
1770
1771    def set_debug(self, flag=True) -> "ParserElement":
1772        """
1773        Enable display of debugging messages while doing pattern matching.
1774        Set ``flag`` to ``True`` to enable, ``False`` to disable.
1775
1776        Example::
1777
1778            wd = Word(alphas).set_name("alphaword")
1779            integer = Word(nums).set_name("numword")
1780            term = wd | integer
1781
1782            # turn on debugging for wd
1783            wd.set_debug()
1784
1785            OneOrMore(term).parse_string("abc 123 xyz 890")
1786
1787        prints::
1788
1789            Match alphaword at loc 0(1,1)
1790            Matched alphaword -> ['abc']
1791            Match alphaword at loc 3(1,4)
1792            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
1793            Match alphaword at loc 7(1,8)
1794            Matched alphaword -> ['xyz']
1795            Match alphaword at loc 11(1,12)
1796            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
1797            Match alphaword at loc 15(1,16)
1798            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
1799
1800        The output shown is that produced by the default debug actions - custom debug actions can be
1801        specified using :class:`set_debug_actions`. Prior to attempting
1802        to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
1803        is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
1804        message is shown. Also note the use of :class:`set_name` to assign a human-readable name to the expression,
1805        which makes debugging and exception messages easier to understand - for instance, the default
1806        name created for the :class:`Word` expression without calling ``set_name`` is ``"W:(A-Za-z)"``.
1807        """
1808        if flag:
1809            self.set_debug_actions(
1810                _default_start_debug_action,
1811                _default_success_debug_action,
1812                _default_exception_debug_action,
1813            )
1814        else:
1815            self.debug = False
1816        return self
1817
1818    @property
1819    def default_name(self) -> str:
1820        if self._defaultName is None:
1821            self._defaultName = self._generateDefaultName()
1822        return self._defaultName
1823
1824    @abstractmethod
1825    def _generateDefaultName(self):
1826        """
1827        Child classes must define this method, which defines how the ``default_name`` is set.
1828        """
1829
1830    def set_name(self, name: str) -> "ParserElement":
1831        """
1832        Define name for this expression, makes debugging and exception messages clearer.
1833        Example::
1834            Word(nums).parse_string("ABC")  # -> Exception: Expected W:(0-9) (at char 0), (line:1, col:1)
1835            Word(nums).set_name("integer").parse_string("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
1836        """
1837        self.customName = name
1838        self.errmsg = "Expected " + self.name
1839        if __diag__.enable_debug_on_named_expressions:
1840            self.set_debug()
1841        return self
1842
1843    @property
1844    def name(self) -> str:
1845        # This will use a user-defined name if available, but otherwise defaults back to the auto-generated name
1846        return self.customName if self.customName is not None else self.default_name
1847
1848    def __str__(self) -> str:
1849        return self.name
1850
1851    def __repr__(self) -> str:
1852        return str(self)
1853
1854    def streamline(self) -> "ParserElement":
1855        self.streamlined = True
1856        self._defaultName = None
1857        return self
1858
1859    def recurse(self):
1860        return []
1861
1862    def _checkRecursion(self, parseElementList):
1863        subRecCheckList = parseElementList[:] + [self]
1864        for e in self.recurse():
1865            e._checkRecursion(subRecCheckList)
1866
1867    def validate(self, validateTrace=None):
1868        """
1869        Check defined expressions for valid structure, check for infinite recursive definitions.
1870        """
1871        self._checkRecursion([])
1872
1873    def parse_file(
1874        self,
1875        file_or_filename: Union[str, Path, TextIO],
1876        encoding: str = "utf-8",
1877        parse_all: bool = False,
1878        *,
1879        parseAll: bool = False,
1880    ) -> ParseResults:
1881        """
1882        Execute the parse expression on the given file or filename.
1883        If a filename is specified (instead of a file object),
1884        the entire file is opened, read, and closed before parsing.
1885        """
1886        parseAll = parseAll or parse_all
1887        try:
1888            file_contents = file_or_filename.read()
1889        except AttributeError:
1890            with open(file_or_filename, "r", encoding=encoding) as f:
1891                file_contents = f.read()
1892        try:
1893            return self.parse_string(file_contents, parseAll)
1894        except ParseBaseException as exc:
1895            if ParserElement.verbose_stacktrace:
1896                raise
1897            else:
1898                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1899                raise exc.with_traceback(None)
1900
1901    def __eq__(self, other):
1902        if self is other:
1903            return True
1904        elif isinstance(other, str_type):
1905            return self.matches(other, parse_all=True)
1906        elif isinstance(other, ParserElement):
1907            return vars(self) == vars(other)
1908        return False
1909
1910    def __hash__(self):
1911        return id(self)
1912
1913    def matches(
1914        self, test_string: str, parse_all: bool = True, *, parseAll: bool = True
1915    ) -> bool:
1916        """
1917        Method for quick testing of a parser against a test string. Good for simple
1918        inline microtests of sub expressions while building up larger parser.
1919
1920        Parameters:
1921        - ``test_string`` - to test against this expression for a match
1922        - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
1923
1924        Example::
1925
1926            expr = Word(nums)
1927            assert expr.matches("100")
1928        """
1929        parseAll = parseAll and parse_all
1930        try:
1931            self.parse_string(str(test_string), parse_all=parseAll)
1932            return True
1933        except ParseBaseException:
1934            return False
1935
1936    def run_tests(
1937        self,
1938        tests: Union[str, List[str]],
1939        parse_all: bool = True,
1940        comment: OptionalType[Union["ParserElement", str]] = "#",
1941        full_dump: bool = True,
1942        print_results: bool = True,
1943        failure_tests: bool = False,
1944        post_parse: Callable[[str, ParseResults], str] = None,
1945        file: OptionalType[TextIO] = None,
1946        with_line_numbers: bool = False,
1947        *,
1948        parseAll: bool = True,
1949        fullDump: bool = True,
1950        printResults: bool = True,
1951        failureTests: bool = False,
1952        postParse: Callable[[str, ParseResults], str] = None,
1953    ):
1954        """
1955        Execute the parse expression on a series of test strings, showing each
1956        test, the parsed results or where the parse failed. Quick and easy way to
1957        run a parse expression against a list of sample strings.
1958
1959        Parameters:
1960        - ``tests`` - a list of separate test strings, or a multiline string of test strings
1961        - ``parse_all`` - (default= ``True``) - flag to pass to :class:`parse_string` when running tests
1962        - ``comment`` - (default= ``'#'``) - expression for indicating embedded comments in the test
1963          string; pass None to disable comment filtering
1964        - ``full_dump`` - (default= ``True``) - dump results as list followed by results names in nested outline;
1965          if False, only dump nested list
1966        - ``print_results`` - (default= ``True``) prints test output to stdout
1967        - ``failure_tests`` - (default= ``False``) indicates if these tests are expected to fail parsing
1968        - ``post_parse`` - (default= ``None``) optional callback for successful parse results; called as
1969          `fn(test_string, parse_results)` and returns a string to be added to the test output
1970        - ``file`` - (default= ``None``) optional file-like object to which test output will be written;
1971          if None, will default to ``sys.stdout``
1972        - ``with_line_numbers`` - default= ``False``) show test strings with line and column numbers
1973
1974        Returns: a (success, results) tuple, where success indicates that all tests succeeded
1975        (or failed if ``failure_tests`` is True), and the results contain a list of lines of each
1976        test's output
1977
1978        Example::
1979
1980            number_expr = pyparsing_common.number.copy()
1981
1982            result = number_expr.run_tests('''
1983                # unsigned integer
1984                100
1985                # negative integer
1986                -100
1987                # float with scientific notation
1988                6.02e23
1989                # integer with scientific notation
1990                1e-12
1991                ''')
1992            print("Success" if result[0] else "Failed!")
1993
1994            result = number_expr.run_tests('''
1995                # stray character
1996                100Z
1997                # missing leading digit before '.'
1998                -.100
1999                # too many '.'
2000                3.14.159
2001                ''', failure_tests=True)
2002            print("Success" if result[0] else "Failed!")
2003
2004        prints::
2005
2006            # unsigned integer
2007            100
2008            [100]
2009
2010            # negative integer
2011            -100
2012            [-100]
2013
2014            # float with scientific notation
2015            6.02e23
2016            [6.02e+23]
2017
2018            # integer with scientific notation
2019            1e-12
2020            [1e-12]
2021
2022            Success
2023
2024            # stray character
2025            100Z
2026               ^
2027            FAIL: Expected end of text (at char 3), (line:1, col:4)
2028
2029            # missing leading digit before '.'
2030            -.100
2031            ^
2032            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2033
2034            # too many '.'
2035            3.14.159
2036                ^
2037            FAIL: Expected end of text (at char 4), (line:1, col:5)
2038
2039            Success
2040
2041        Each test string must be on a single line. If you want to test a string that spans multiple
2042        lines, create a test like this::
2043
2044            expr.run_tests(r"this is a test\\n of strings that spans \\n 3 lines")
2045
2046        (Note that this is a raw string literal, you must include the leading ``'r'``.)
2047        """
2048        from .testing import pyparsing_test
2049
2050        parseAll = parseAll and parse_all
2051        fullDump = fullDump and full_dump
2052        printResults = printResults and print_results
2053        failureTests = failureTests or failure_tests
2054        postParse = postParse or post_parse
2055        if isinstance(tests, str_type):
2056            tests = list(map(type(tests).strip, tests.rstrip().splitlines()))
2057        if isinstance(comment, str_type):
2058            comment = Literal(comment)
2059        if file is None:
2060            file = sys.stdout
2061        print_ = file.write
2062
2063        result: Union[ParseResults, Exception]
2064        allResults = []
2065        comments = []
2066        success = True
2067        NL = Literal(r"\n").add_parse_action(replace_with("\n")).ignore(quoted_string)
2068        BOM = "\ufeff"
2069        for t in tests:
2070            if comment is not None and comment.matches(t, False) or comments and not t:
2071                comments.append(pyparsing_test.with_line_numbers(t))
2072                continue
2073            if not t:
2074                continue
2075            out = [
2076                "\n" + "\n".join(comments) if comments else "",
2077                pyparsing_test.with_line_numbers(t) if with_line_numbers else t,
2078            ]
2079            comments = []
2080            try:
2081                # convert newline marks to actual newlines, and strip leading BOM if present
2082                t = NL.transform_string(t.lstrip(BOM))
2083                result = self.parse_string(t, parse_all=parseAll)
2084            except ParseBaseException as pe:
2085                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2086                out.append(pe.explain())
2087                out.append("FAIL: " + str(pe))
2088                if ParserElement.verbose_stacktrace:
2089                    out.extend(traceback.format_tb(pe.__traceback__))
2090                success = success and failureTests
2091                result = pe
2092            except Exception as exc:
2093                out.append("FAIL-EXCEPTION: {}: {}".format(type(exc).__name__, exc))
2094                if ParserElement.verbose_stacktrace:
2095                    out.extend(traceback.format_tb(exc.__traceback__))
2096                success = success and failureTests
2097                result = exc
2098            else:
2099                success = success and not failureTests
2100                if postParse is not None:
2101                    try:
2102                        pp_value = postParse(t, result)
2103                        if pp_value is not None:
2104                            if isinstance(pp_value, ParseResults):
2105                                out.append(pp_value.dump())
2106                            else:
2107                                out.append(str(pp_value))
2108                        else:
2109                            out.append(result.dump())
2110                    except Exception as e:
2111                        out.append(result.dump(full=fullDump))
2112                        out.append(
2113                            "{} failed: {}: {}".format(
2114                                postParse.__name__, type(e).__name__, e
2115                            )
2116                        )
2117                else:
2118                    out.append(result.dump(full=fullDump))
2119            out.append("")
2120
2121            if printResults:
2122                print_("\n".join(out))
2123
2124            allResults.append((t, result))
2125
2126        return success, allResults
2127
2128    def create_diagram(
2129        self,
2130        output_html: Union[TextIO, Path, str],
2131        vertical: int = 3,
2132        show_results_names: bool = False,
2133        **kwargs,
2134    ) -> None:
2135        """
2136        Create a railroad diagram for the parser.
2137
2138        Parameters:
2139        - output_html (str or file-like object) - output target for generated
2140          diagram HTML
2141        - vertical (int) - threshold for formatting multiple alternatives vertically
2142          instead of horizontally (default=3)
2143        - show_results_names - bool flag whether diagram should show annotations for
2144          defined results names
2145
2146        Additional diagram-formatting keyword arguments can also be included;
2147        see railroad.Diagram class.
2148        """
2149
2150        try:
2151            from .diagram import to_railroad, railroad_to_html
2152        except ImportError as ie:
2153            raise Exception(
2154                "must ``pip install pyparsing[diagrams]`` to generate parser railroad diagrams"
2155            ) from ie
2156
2157        self.streamline()
2158
2159        railroad = to_railroad(
2160            self,
2161            vertical=vertical,
2162            show_results_names=show_results_names,
2163            diagram_kwargs=kwargs,
2164        )
2165        if isinstance(output_html, (str, Path)):
2166            with open(output_html, "w", encoding="utf-8") as diag_file:
2167                diag_file.write(railroad_to_html(railroad))
2168        else:
2169            # we were passed a file-like object, just write to it
2170            output_html.write(railroad_to_html(railroad))
2171
2172    setDefaultWhitespaceChars = set_default_whitespace_chars
2173    inlineLiteralsUsing = inline_literals_using
2174    setResultsName = set_results_name
2175    setBreak = set_break
2176    setParseAction = set_parse_action
2177    addParseAction = add_parse_action
2178    addCondition = add_condition
2179    setFailAction = set_fail_action
2180    tryParse = try_parse
2181    canParseNext = can_parse_next
2182    resetCache = reset_cache
2183    enableLeftRecursion = enable_left_recursion
2184    enablePackrat = enable_packrat
2185    parseString = parse_string
2186    scanString = scan_string
2187    searchString = search_string
2188    transformString = transform_string
2189    setWhitespaceChars = set_whitespace_chars
2190    parseWithTabs = parse_with_tabs
2191    setDebugActions = set_debug_actions
2192    setDebug = set_debug
2193    defaultName = default_name
2194    setName = set_name
2195    parseFile = parse_file
2196    runTests = run_tests
2197    ignoreWhitespace = ignore_whitespace
2198    leaveWhitespace = leave_whitespace
2199
2200
2201class _PendingSkip(ParserElement):
2202    # internal placeholder class to hold a place were '...' is added to a parser element,
2203    # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2204    def __init__(self, expr: ParserElement, must_skip: bool = False):
2205        super().__init__()
2206        self.anchor = expr
2207        self.must_skip = must_skip
2208
2209    def _generateDefaultName(self):
2210        return str(self.anchor + Empty()).replace("Empty", "...")
2211
2212    def __add__(self, other):
2213        skipper = SkipTo(other).set_name("...")("_skipped*")
2214        if self.must_skip:
2215
2216            def must_skip(t):
2217                if not t._skipped or t._skipped.as_list() == [""]:
2218                    del t[0]
2219                    t.pop("_skipped", None)
2220
2221            def show_skip(t):
2222                if t._skipped.as_list()[-1:] == [""]:
2223                    t.pop("_skipped")
2224                    t["_skipped"] = "missing <" + repr(self.anchor) + ">"
2225
2226            return (
2227                self.anchor + skipper().add_parse_action(must_skip)
2228                | skipper().add_parse_action(show_skip)
2229            ) + other
2230
2231        return self.anchor + skipper + other
2232
2233    def __repr__(self):
2234        return self.defaultName
2235
2236    def parseImpl(self, *args):
2237        raise Exception(
2238            "use of `...` expression without following SkipTo target expression"
2239        )
2240
2241
2242class Token(ParserElement):
2243    """Abstract :class:`ParserElement` subclass, for defining atomic
2244    matching patterns.
2245    """
2246
2247    def __init__(self):
2248        super().__init__(savelist=False)
2249
2250    def _generateDefaultName(self):
2251        return type(self).__name__
2252
2253
2254class Empty(Token):
2255    """
2256    An empty token, will always match.
2257    """
2258
2259    def __init__(self):
2260        super().__init__()
2261        self.mayReturnEmpty = True
2262        self.mayIndexError = False
2263
2264
2265class NoMatch(Token):
2266    """
2267    A token that will never match.
2268    """
2269
2270    def __init__(self):
2271        super().__init__()
2272        self.mayReturnEmpty = True
2273        self.mayIndexError = False
2274        self.errmsg = "Unmatchable token"
2275
2276    def parseImpl(self, instring, loc, doActions=True):
2277        raise ParseException(instring, loc, self.errmsg, self)
2278
2279
2280class Literal(Token):
2281    """
2282    Token to exactly match a specified string.
2283
2284    Example::
2285
2286        Literal('blah').parse_string('blah')  # -> ['blah']
2287        Literal('blah').parse_string('blahfooblah')  # -> ['blah']
2288        Literal('blah').parse_string('bla')  # -> Exception: Expected "blah"
2289
2290    For case-insensitive matching, use :class:`CaselessLiteral`.
2291
2292    For keyword matching (force word break before and after the matched string),
2293    use :class:`Keyword` or :class:`CaselessKeyword`.
2294    """
2295
2296    def __init__(self, match_string: str = "", *, matchString: str = ""):
2297        super().__init__()
2298        match_string = matchString or match_string
2299        self.match = match_string
2300        self.matchLen = len(match_string)
2301        try:
2302            self.firstMatchChar = match_string[0]
2303        except IndexError:
2304            raise ValueError("null string passed to Literal; use Empty() instead")
2305        self.errmsg = "Expected " + self.name
2306        self.mayReturnEmpty = False
2307        self.mayIndexError = False
2308
2309        # Performance tuning: modify __class__ to select
2310        # a parseImpl optimized for single-character check
2311        if self.matchLen == 1 and type(self) is Literal:
2312            self.__class__ = _SingleCharLiteral
2313
2314    def _generateDefaultName(self):
2315        return repr(self.match)
2316
2317    def parseImpl(self, instring, loc, doActions=True):
2318        if instring[loc] == self.firstMatchChar and instring.startswith(
2319            self.match, loc
2320        ):
2321            return loc + self.matchLen, self.match
2322        raise ParseException(instring, loc, self.errmsg, self)
2323
2324
2325class _SingleCharLiteral(Literal):
2326    def parseImpl(self, instring, loc, doActions=True):
2327        if instring[loc] == self.firstMatchChar:
2328            return loc + 1, self.match
2329        raise ParseException(instring, loc, self.errmsg, self)
2330
2331
2332ParserElement._literalStringClass = Literal
2333
2334
2335class Keyword(Token):
2336    """
2337    Token to exactly match a specified string as a keyword, that is,
2338    it must be immediately followed by a non-keyword character.  Compare
2339    with :class:`Literal`:
2340
2341    - ``Literal("if")`` will match the leading ``'if'`` in
2342      ``'ifAndOnlyIf'``.
2343    - ``Keyword("if")`` will not; it will only match the leading
2344      ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2345
2346    Accepts two optional constructor arguments in addition to the
2347    keyword string:
2348
2349    - ``identChars`` is a string of characters that would be valid
2350      identifier characters, defaulting to all alphanumerics + "_" and
2351      "$"
2352    - ``caseless`` allows case-insensitive matching, default is ``False``.
2353
2354    Example::
2355
2356        Keyword("start").parse_string("start")  # -> ['start']
2357        Keyword("start").parse_string("starting")  # -> Exception
2358
2359    For case-insensitive matching, use :class:`CaselessKeyword`.
2360    """
2361
2362    DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2363
2364    def __init__(
2365        self,
2366        match_string: str = "",
2367        ident_chars: OptionalType[str] = None,
2368        caseless: bool = False,
2369        *,
2370        matchString: str = "",
2371        identChars: OptionalType[str] = None,
2372    ):
2373        super().__init__()
2374        identChars = identChars or ident_chars
2375        if identChars is None:
2376            identChars = Keyword.DEFAULT_KEYWORD_CHARS
2377        match_string = matchString or match_string
2378        self.match = match_string
2379        self.matchLen = len(match_string)
2380        try:
2381            self.firstMatchChar = match_string[0]
2382        except IndexError:
2383            raise ValueError("null string passed to Keyword; use Empty() instead")
2384        self.errmsg = "Expected {} {}".format(type(self).__name__, self.name)
2385        self.mayReturnEmpty = False
2386        self.mayIndexError = False
2387        self.caseless = caseless
2388        if caseless:
2389            self.caselessmatch = match_string.upper()
2390            identChars = identChars.upper()
2391        self.identChars = set(identChars)
2392
2393    def _generateDefaultName(self):
2394        return repr(self.match)
2395
2396    def parseImpl(self, instring, loc, doActions=True):
2397        errmsg = self.errmsg
2398        errloc = loc
2399        if self.caseless:
2400            if instring[loc : loc + self.matchLen].upper() == self.caselessmatch:
2401                if loc == 0 or instring[loc - 1].upper() not in self.identChars:
2402                    if (
2403                        loc >= len(instring) - self.matchLen
2404                        or instring[loc + self.matchLen].upper() not in self.identChars
2405                    ):
2406                        return loc + self.matchLen, self.match
2407                    else:
2408                        # followed by keyword char
2409                        errmsg += ", was immediately followed by keyword character"
2410                        errloc = loc + self.matchLen
2411                else:
2412                    # preceded by keyword char
2413                    errmsg += ", keyword was immediately preceded by keyword character"
2414                    errloc = loc - 1
2415            # else no match just raise plain exception
2416
2417        else:
2418            if (
2419                instring[loc] == self.firstMatchChar
2420                and self.matchLen == 1
2421                or instring.startswith(self.match, loc)
2422            ):
2423                if loc == 0 or instring[loc - 1] not in self.identChars:
2424                    if (
2425                        loc >= len(instring) - self.matchLen
2426                        or instring[loc + self.matchLen] not in self.identChars
2427                    ):
2428                        return loc + self.matchLen, self.match
2429                    else:
2430                        # followed by keyword char
2431                        errmsg += (
2432                            ", keyword was immediately followed by keyword character"
2433                        )
2434                        errloc = loc + self.matchLen
2435                else:
2436                    # preceded by keyword char
2437                    errmsg += ", keyword was immediately preceded by keyword character"
2438                    errloc = loc - 1
2439            # else no match just raise plain exception
2440
2441        raise ParseException(instring, errloc, errmsg, self)
2442
2443    @staticmethod
2444    def set_default_keyword_chars(chars):
2445        """
2446        Overrides the default characters used by :class:`Keyword` expressions.
2447        """
2448        Keyword.DEFAULT_KEYWORD_CHARS = chars
2449
2450    setDefaultKeywordChars = set_default_keyword_chars
2451
2452
2453class CaselessLiteral(Literal):
2454    """
2455    Token to match a specified string, ignoring case of letters.
2456    Note: the matched results will always be in the case of the given
2457    match string, NOT the case of the input text.
2458
2459    Example::
2460
2461        OneOrMore(CaselessLiteral("CMD")).parse_string("cmd CMD Cmd10")
2462        # -> ['CMD', 'CMD', 'CMD']
2463
2464    (Contrast with example for :class:`CaselessKeyword`.)
2465    """
2466
2467    def __init__(self, match_string: str = "", *, matchString: str = ""):
2468        match_string = matchString or match_string
2469        super().__init__(match_string.upper())
2470        # Preserve the defining literal.
2471        self.returnString = match_string
2472        self.errmsg = "Expected " + self.name
2473
2474    def parseImpl(self, instring, loc, doActions=True):
2475        if instring[loc : loc + self.matchLen].upper() == self.match:
2476            return loc + self.matchLen, self.returnString
2477        raise ParseException(instring, loc, self.errmsg, self)
2478
2479
2480class CaselessKeyword(Keyword):
2481    """
2482    Caseless version of :class:`Keyword`.
2483
2484    Example::
2485
2486        OneOrMore(CaselessKeyword("CMD")).parse_string("cmd CMD Cmd10")
2487        # -> ['CMD', 'CMD']
2488
2489    (Contrast with example for :class:`CaselessLiteral`.)
2490    """
2491
2492    def __init__(
2493        self,
2494        match_string: str = "",
2495        ident_chars: OptionalType[str] = None,
2496        *,
2497        matchString: str = "",
2498        identChars: OptionalType[str] = None,
2499    ):
2500        identChars = identChars or ident_chars
2501        match_string = matchString or match_string
2502        super().__init__(match_string, identChars, caseless=True)
2503
2504
2505class CloseMatch(Token):
2506    """A variation on :class:`Literal` which matches "close" matches,
2507    that is, strings with at most 'n' mismatching characters.
2508    :class:`CloseMatch` takes parameters:
2509
2510    - ``match_string`` - string to be matched
2511    - ``caseless`` - a boolean indicating whether to ignore casing when comparing characters
2512    - ``max_mismatches`` - (``default=1``) maximum number of
2513      mismatches allowed to count as a match
2514
2515    The results from a successful parse will contain the matched text
2516    from the input string and the following named results:
2517
2518    - ``mismatches`` - a list of the positions within the
2519      match_string where mismatches were found
2520    - ``original`` - the original match_string used to compare
2521      against the input string
2522
2523    If ``mismatches`` is an empty list, then the match was an exact
2524    match.
2525
2526    Example::
2527
2528        patt = CloseMatch("ATCATCGAATGGA")
2529        patt.parse_string("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2530        patt.parse_string("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2531
2532        # exact match
2533        patt.parse_string("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2534
2535        # close match allowing up to 2 mismatches
2536        patt = CloseMatch("ATCATCGAATGGA", max_mismatches=2)
2537        patt.parse_string("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2538    """
2539
2540    def __init__(
2541        self,
2542        match_string: str,
2543        max_mismatches: int = None,
2544        *,
2545        maxMismatches: int = 1,
2546        caseless=False,
2547    ):
2548        maxMismatches = max_mismatches if max_mismatches is not None else maxMismatches
2549        super().__init__()
2550        self.match_string = match_string
2551        self.maxMismatches = maxMismatches
2552        self.errmsg = "Expected {!r} (with up to {} mismatches)".format(
2553            self.match_string, self.maxMismatches
2554        )
2555        self.caseless = caseless
2556        self.mayIndexError = False
2557        self.mayReturnEmpty = False
2558
2559    def _generateDefaultName(self):
2560        return "{}:{!r}".format(type(self).__name__, self.match_string)
2561
2562    def parseImpl(self, instring, loc, doActions=True):
2563        start = loc
2564        instrlen = len(instring)
2565        maxloc = start + len(self.match_string)
2566
2567        if maxloc <= instrlen:
2568            match_string = self.match_string
2569            match_stringloc = 0
2570            mismatches = []
2571            maxMismatches = self.maxMismatches
2572
2573            for match_stringloc, s_m in enumerate(
2574                zip(instring[loc:maxloc], match_string)
2575            ):
2576                src, mat = s_m
2577                if self.caseless:
2578                    src, mat = src.lower(), mat.lower()
2579
2580                if src != mat:
2581                    mismatches.append(match_stringloc)
2582                    if len(mismatches) > maxMismatches:
2583                        break
2584            else:
2585                loc = start + match_stringloc + 1
2586                results = ParseResults([instring[start:loc]])
2587                results["original"] = match_string
2588                results["mismatches"] = mismatches
2589                return loc, results
2590
2591        raise ParseException(instring, loc, self.errmsg, self)
2592
2593
2594class Word(Token):
2595    """Token for matching words composed of allowed character sets.
2596    Parameters:
2597    - ``init_chars`` - string of all characters that should be used to
2598      match as a word; "ABC" will match "AAA", "ABAB", "CBAC", etc.;
2599      if ``body_chars`` is also specified, then this is the string of
2600      initial characters
2601    - ``body_chars`` - string of characters that
2602      can be used for matching after a matched initial character as
2603      given in ``init_chars``; if omitted, same as the initial characters
2604      (default=``None``)
2605    - ``min`` - minimum number of characters to match (default=1)
2606    - ``max`` - maximum number of characters to match (default=0)
2607    - ``exact`` - exact number of characters to match (default=0)
2608    - ``as_keyword`` - match as a keyword (default=``False``)
2609    - ``exclude_chars`` - characters that might be
2610      found in the input ``body_chars`` string but which should not be
2611      accepted for matching ;useful to define a word of all
2612      printables except for one or two characters, for instance
2613      (default=``None``)
2614
2615    :class:`srange` is useful for defining custom character set strings
2616    for defining :class:`Word` expressions, using range notation from
2617    regular expression character sets.
2618
2619    A common mistake is to use :class:`Word` to match a specific literal
2620    string, as in ``Word("Address")``. Remember that :class:`Word`
2621    uses the string argument to define *sets* of matchable characters.
2622    This expression would match "Add", "AAA", "dAred", or any other word
2623    made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
2624    exact literal string, use :class:`Literal` or :class:`Keyword`.
2625
2626    pyparsing includes helper strings for building Words:
2627
2628    - :class:`alphas`
2629    - :class:`nums`
2630    - :class:`alphanums`
2631    - :class:`hexnums`
2632    - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
2633      - accented, tilded, umlauted, etc.)
2634    - :class:`punc8bit` (non-alphabetic characters in ASCII range
2635      128-255 - currency, symbols, superscripts, diacriticals, etc.)
2636    - :class:`printables` (any non-whitespace character)
2637
2638    ``alphas``, ``nums``, and ``printables`` are also defined in several
2639    Unicode sets - see :class:`pyparsing_unicode``.
2640
2641    Example::
2642
2643        # a word composed of digits
2644        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2645
2646        # a word with a leading capital, and zero or more lowercase
2647        capital_word = Word(alphas.upper(), alphas.lower())
2648
2649        # hostnames are alphanumeric, with leading alpha, and '-'
2650        hostname = Word(alphas, alphanums + '-')
2651
2652        # roman numeral (not a strict parser, accepts invalid mix of characters)
2653        roman = Word("IVXLCDM")
2654
2655        # any string of non-whitespace characters, except for ','
2656        csv_value = Word(printables, exclude_chars=",")
2657    """
2658
2659    def __init__(
2660        self,
2661        init_chars: str = "",
2662        body_chars: OptionalType[str] = None,
2663        min: int = 1,
2664        max: int = 0,
2665        exact: int = 0,
2666        as_keyword: bool = False,
2667        exclude_chars: OptionalType[str] = None,
2668        *,
2669        initChars: OptionalType[str] = None,
2670        bodyChars: OptionalType[str] = None,
2671        asKeyword: bool = False,
2672        excludeChars: OptionalType[str] = None,
2673    ):
2674        initChars = initChars or init_chars
2675        bodyChars = bodyChars or body_chars
2676        asKeyword = asKeyword or as_keyword
2677        excludeChars = excludeChars or exclude_chars
2678        super().__init__()
2679        if not initChars:
2680            raise ValueError(
2681                "invalid {}, initChars cannot be empty string".format(
2682                    type(self).__name__
2683                )
2684            )
2685
2686        initChars = set(initChars)
2687        self.initChars = initChars
2688        if excludeChars:
2689            excludeChars = set(excludeChars)
2690            initChars -= excludeChars
2691            if bodyChars:
2692                bodyChars = set(bodyChars) - excludeChars
2693        self.initCharsOrig = "".join(sorted(initChars))
2694
2695        if bodyChars:
2696            self.bodyCharsOrig = "".join(sorted(bodyChars))
2697            self.bodyChars = set(bodyChars)
2698        else:
2699            self.bodyCharsOrig = "".join(sorted(initChars))
2700            self.bodyChars = set(initChars)
2701
2702        self.maxSpecified = max > 0
2703
2704        if min < 1:
2705            raise ValueError(
2706                "cannot specify a minimum length < 1; use Opt(Word()) if zero-length word is permitted"
2707            )
2708
2709        self.minLen = min
2710
2711        if max > 0:
2712            self.maxLen = max
2713        else:
2714            self.maxLen = _MAX_INT
2715
2716        if exact > 0:
2717            self.maxLen = exact
2718            self.minLen = exact
2719
2720        self.errmsg = "Expected " + self.name
2721        self.mayIndexError = False
2722        self.asKeyword = asKeyword
2723
2724        # see if we can make a regex for this Word
2725        if " " not in self.initChars | self.bodyChars and (min == 1 and exact == 0):
2726            if self.bodyChars == self.initChars:
2727                if max == 0:
2728                    repeat = "+"
2729                elif max == 1:
2730                    repeat = ""
2731                else:
2732                    repeat = "{{{},{}}}".format(
2733                        self.minLen, "" if self.maxLen == _MAX_INT else self.maxLen
2734                    )
2735                self.reString = "[{}]{}".format(
2736                    _collapse_string_to_ranges(self.initChars),
2737                    repeat,
2738                )
2739            elif len(self.initChars) == 1:
2740                if max == 0:
2741                    repeat = "*"
2742                else:
2743                    repeat = "{{0,{}}}".format(max - 1)
2744                self.reString = "{}[{}]{}".format(
2745                    re.escape(self.initCharsOrig),
2746                    _collapse_string_to_ranges(self.bodyChars),
2747                    repeat,
2748                )
2749            else:
2750                if max == 0:
2751                    repeat = "*"
2752                elif max == 2:
2753                    repeat = ""
2754                else:
2755                    repeat = "{{0,{}}}".format(max - 1)
2756                self.reString = "[{}][{}]{}".format(
2757                    _collapse_string_to_ranges(self.initChars),
2758                    _collapse_string_to_ranges(self.bodyChars),
2759                    repeat,
2760                )
2761            if self.asKeyword:
2762                self.reString = r"\b" + self.reString + r"\b"
2763
2764            try:
2765                self.re = re.compile(self.reString)
2766            except sre_constants.error:
2767                self.re = None
2768            else:
2769                self.re_match = self.re.match
2770                self.__class__ = _WordRegex
2771
2772    def _generateDefaultName(self):
2773        def charsAsStr(s):
2774            max_repr_len = 16
2775            s = _collapse_string_to_ranges(s, re_escape=False)
2776            if len(s) > max_repr_len:
2777                return s[: max_repr_len - 3] + "..."
2778            else:
2779                return s
2780
2781        if self.initChars != self.bodyChars:
2782            base = "W:({}, {})".format(
2783                charsAsStr(self.initChars), charsAsStr(self.bodyChars)
2784            )
2785        else:
2786            base = "W:({})".format(charsAsStr(self.initChars))
2787
2788        # add length specification
2789        if self.minLen > 1 or self.maxLen != _MAX_INT:
2790            if self.minLen == self.maxLen:
2791                if self.minLen == 1:
2792                    return base[2:]
2793                else:
2794                    return base + "{{{}}}".format(self.minLen)
2795            elif self.maxLen == _MAX_INT:
2796                return base + "{{{},...}}".format(self.minLen)
2797            else:
2798                return base + "{{{},{}}}".format(self.minLen, self.maxLen)
2799        return base
2800
2801    def parseImpl(self, instring, loc, doActions=True):
2802        if instring[loc] not in self.initChars:
2803            raise ParseException(instring, loc, self.errmsg, self)
2804
2805        start = loc
2806        loc += 1
2807        instrlen = len(instring)
2808        bodychars = self.bodyChars
2809        maxloc = start + self.maxLen
2810        maxloc = min(maxloc, instrlen)
2811        while loc < maxloc and instring[loc] in bodychars:
2812            loc += 1
2813
2814        throwException = False
2815        if loc - start < self.minLen:
2816            throwException = True
2817        elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2818            throwException = True
2819        elif self.asKeyword:
2820            if (
2821                start > 0
2822                and instring[start - 1] in bodychars
2823                or loc < instrlen
2824                and instring[loc] in bodychars
2825            ):
2826                throwException = True
2827
2828        if throwException:
2829            raise ParseException(instring, loc, self.errmsg, self)
2830
2831        return loc, instring[start:loc]
2832
2833
2834class _WordRegex(Word):
2835    def parseImpl(self, instring, loc, doActions=True):
2836        result = self.re_match(instring, loc)
2837        if not result:
2838            raise ParseException(instring, loc, self.errmsg, self)
2839
2840        loc = result.end()
2841        return loc, result.group()
2842
2843
2844class Char(_WordRegex):
2845    """A short-cut class for defining :class:`Word` ``(characters, exact=1)``,
2846    when defining a match of any single character in a string of
2847    characters.
2848    """
2849
2850    def __init__(
2851        self,
2852        charset: str,
2853        as_keyword: bool = False,
2854        exclude_chars: OptionalType[str] = None,
2855        *,
2856        asKeyword: bool = False,
2857        excludeChars: OptionalType[str] = None,
2858    ):
2859        asKeyword = asKeyword or as_keyword
2860        excludeChars = excludeChars or exclude_chars
2861        super().__init__(
2862            charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars
2863        )
2864        self.reString = "[{}]".format(_collapse_string_to_ranges(self.initChars))
2865        if asKeyword:
2866            self.reString = r"\b{}\b".format(self.reString)
2867        self.re = re.compile(self.reString)
2868        self.re_match = self.re.match
2869
2870
2871class Regex(Token):
2872    r"""Token for matching strings that match a given regular
2873    expression. Defined with string specifying the regular expression in
2874    a form recognized by the stdlib Python  `re module <https://docs.python.org/3/library/re.html>`_.
2875    If the given regex contains named groups (defined using ``(?P<name>...)``),
2876    these will be preserved as named :class:`ParseResults`.
2877
2878    If instead of the Python stdlib ``re`` module you wish to use a different RE module
2879    (such as the ``regex`` module), you can do so by building your ``Regex`` object with
2880    a compiled RE that was compiled using ``regex``.
2881
2882    Example::
2883
2884        realnum = Regex(r"[+-]?\d+\.\d*")
2885        # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2886        roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2887
2888        # named fields in a regex will be returned as named results
2889        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2890
2891        # the Regex class will accept re's compiled using the regex module
2892        import regex
2893        parser = pp.Regex(regex.compile(r'[0-9]'))
2894    """
2895
2896    def __init__(
2897        self,
2898        pattern: Any,
2899        flags: Union[re.RegexFlag, int] = 0,
2900        as_group_list: bool = False,
2901        as_match: bool = False,
2902        *,
2903        asGroupList: bool = False,
2904        asMatch: bool = False,
2905    ):
2906        """The parameters ``pattern`` and ``flags`` are passed
2907        to the ``re.compile()`` function as-is. See the Python
2908        `re module <https://docs.python.org/3/library/re.html>`_ module for an
2909        explanation of the acceptable patterns and flags.
2910        """
2911        super().__init__()
2912        asGroupList = asGroupList or as_group_list
2913        asMatch = asMatch or as_match
2914
2915        if isinstance(pattern, str_type):
2916            if not pattern:
2917                raise ValueError("null string passed to Regex; use Empty() instead")
2918
2919            self.pattern = pattern
2920            self.flags = flags
2921
2922            try:
2923                self.re = re.compile(self.pattern, self.flags)
2924                self.reString = self.pattern
2925            except sre_constants.error:
2926                raise ValueError(
2927                    "invalid pattern ({!r}) passed to Regex".format(pattern)
2928                )
2929
2930        elif hasattr(pattern, "pattern") and hasattr(pattern, "match"):
2931            self.re = pattern
2932            self.pattern = self.reString = pattern.pattern
2933            self.flags = flags
2934
2935        else:
2936            raise TypeError(
2937                "Regex may only be constructed with a string or a compiled RE object"
2938            )
2939
2940        self.re_match = self.re.match
2941
2942        self.errmsg = "Expected " + self.name
2943        self.mayIndexError = False
2944        self.mayReturnEmpty = self.re_match("") is not None
2945        self.asGroupList = asGroupList
2946        self.asMatch = asMatch
2947        if self.asGroupList:
2948            self.parseImpl = self.parseImplAsGroupList
2949        if self.asMatch:
2950            self.parseImpl = self.parseImplAsMatch
2951
2952    def _generateDefaultName(self):
2953        return "Re:({})".format(repr(self.pattern).replace("\\\\", "\\"))
2954
2955    def parseImpl(self, instring, loc, doActions=True):
2956        result = self.re_match(instring, loc)
2957        if not result:
2958            raise ParseException(instring, loc, self.errmsg, self)
2959
2960        loc = result.end()
2961        ret = ParseResults(result.group())
2962        d = result.groupdict()
2963        if d:
2964            for k, v in d.items():
2965                ret[k] = v
2966        return loc, ret
2967
2968    def parseImplAsGroupList(self, instring, loc, doActions=True):
2969        result = self.re_match(instring, loc)
2970        if not result:
2971            raise ParseException(instring, loc, self.errmsg, self)
2972
2973        loc = result.end()
2974        ret = result.groups()
2975        return loc, ret
2976
2977    def parseImplAsMatch(self, instring, loc, doActions=True):
2978        result = self.re_match(instring, loc)
2979        if not result:
2980            raise ParseException(instring, loc, self.errmsg, self)
2981
2982        loc = result.end()
2983        ret = result
2984        return loc, ret
2985
2986    def sub(self, repl):
2987        r"""
2988        Return :class:`Regex` with an attached parse action to transform the parsed
2989        result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
2990
2991        Example::
2992
2993            make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
2994            print(make_html.transform_string("h1:main title:"))
2995            # prints "<h1>main title</h1>"
2996        """
2997        if self.asGroupList:
2998            raise TypeError("cannot use sub() with Regex(asGroupList=True)")
2999
3000        if self.asMatch and callable(repl):
3001            raise TypeError("cannot use sub() with a callable with Regex(asMatch=True)")
3002
3003        if self.asMatch:
3004
3005            def pa(tokens):
3006                return tokens[0].expand(repl)
3007
3008        else:
3009
3010            def pa(tokens):
3011                return self.re.sub(repl, tokens[0])
3012
3013        return self.add_parse_action(pa)
3014
3015
3016class QuotedString(Token):
3017    r"""
3018    Token for matching strings that are delimited by quoting characters.
3019
3020    Defined with the following parameters:
3021
3022    - ``quote_char`` - string of one or more characters defining the
3023      quote delimiting string
3024    - ``esc_char`` - character to re_escape quotes, typically backslash
3025      (default= ``None``)
3026    - ``esc_quote`` - special quote sequence to re_escape an embedded quote
3027      string (such as SQL's ``""`` to re_escape an embedded ``"``)
3028      (default= ``None``)
3029    - ``multiline`` - boolean indicating whether quotes can span
3030      multiple lines (default= ``False``)
3031    - ``unquote_results`` - boolean indicating whether the matched text
3032      should be unquoted (default= ``True``)
3033    - ``end_quote_char`` - string of one or more characters defining the
3034      end of the quote delimited string (default= ``None``  => same as
3035      quote_char)
3036    - ``convert_whitespace_escapes`` - convert escaped whitespace
3037      (``'\t'``, ``'\n'``, etc.) to actual whitespace
3038      (default= ``True``)
3039
3040    Example::
3041
3042        qs = QuotedString('"')
3043        print(qs.search_string('lsjdf "This is the quote" sldjf'))
3044        complex_qs = QuotedString('{{', end_quote_char='}}')
3045        print(complex_qs.search_string('lsjdf {{This is the "quote"}} sldjf'))
3046        sql_qs = QuotedString('"', esc_quote='""')
3047        print(sql_qs.search_string('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3048
3049    prints::
3050
3051        [['This is the quote']]
3052        [['This is the "quote"']]
3053        [['This is the quote with "embedded" quotes']]
3054    """
3055    ws_map = ((r"\t", "\t"), (r"\n", "\n"), (r"\f", "\f"), (r"\r", "\r"))
3056
3057    def __init__(
3058        self,
3059        quote_char: str = "",
3060        esc_char: OptionalType[str] = None,
3061        esc_quote: OptionalType[str] = None,
3062        multiline: bool = False,
3063        unquote_results: bool = True,
3064        end_quote_char: OptionalType[str] = None,
3065        convert_whitespace_escapes: bool = True,
3066        *,
3067        quoteChar: str = "",
3068        escChar: OptionalType[str] = None,
3069        escQuote: OptionalType[str] = None,
3070        unquoteResults: bool = True,
3071        endQuoteChar: OptionalType[str] = None,
3072        convertWhitespaceEscapes: bool = True,
3073    ):
3074        super().__init__()
3075        escChar = escChar or esc_char
3076        escQuote = escQuote or esc_quote
3077        unquoteResults = unquoteResults and unquote_results
3078        endQuoteChar = endQuoteChar or end_quote_char
3079        convertWhitespaceEscapes = (
3080            convertWhitespaceEscapes and convert_whitespace_escapes
3081        )
3082        quote_char = quoteChar or quote_char
3083
3084        # remove white space from quote chars - wont work anyway
3085        quote_char = quote_char.strip()
3086        if not quote_char:
3087            raise ValueError("quote_char cannot be the empty string")
3088
3089        if endQuoteChar is None:
3090            endQuoteChar = quote_char
3091        else:
3092            endQuoteChar = endQuoteChar.strip()
3093            if not endQuoteChar:
3094                raise ValueError("endQuoteChar cannot be the empty string")
3095
3096        self.quoteChar = quote_char
3097        self.quoteCharLen = len(quote_char)
3098        self.firstQuoteChar = quote_char[0]
3099        self.endQuoteChar = endQuoteChar
3100        self.endQuoteCharLen = len(endQuoteChar)
3101        self.escChar = escChar
3102        self.escQuote = escQuote
3103        self.unquoteResults = unquoteResults
3104        self.convertWhitespaceEscapes = convertWhitespaceEscapes
3105
3106        sep = ""
3107        inner_pattern = ""
3108
3109        if escQuote:
3110            inner_pattern += r"{}(?:{})".format(sep, re.escape(escQuote))
3111            sep = "|"
3112
3113        if escChar:
3114            inner_pattern += r"{}(?:{}.)".format(sep, re.escape(escChar))
3115            sep = "|"
3116            self.escCharReplacePattern = re.escape(self.escChar) + "(.)"
3117
3118        if len(self.endQuoteChar) > 1:
3119            inner_pattern += (
3120                "{}(?:".format(sep)
3121                + "|".join(
3122                    "(?:{}(?!{}))".format(
3123                        re.escape(self.endQuoteChar[:i]),
3124                        _escape_regex_range_chars(self.endQuoteChar[i:]),
3125                    )
3126                    for i in range(len(self.endQuoteChar) - 1, 0, -1)
3127                )
3128                + ")"
3129            )
3130            sep = "|"
3131
3132        if multiline:
3133            self.flags = re.MULTILINE | re.DOTALL
3134            inner_pattern += r"{}(?:[^{}{}])".format(
3135                sep,
3136                _escape_regex_range_chars(self.endQuoteChar[0]),
3137                (_escape_regex_range_chars(escChar) if escChar is not None else ""),
3138            )
3139        else:
3140            self.flags = 0
3141            inner_pattern += r"{}(?:[^{}\n\r{}])".format(
3142                sep,
3143                _escape_regex_range_chars(self.endQuoteChar[0]),
3144                (_escape_regex_range_chars(escChar) if escChar is not None else ""),
3145            )
3146
3147        self.pattern = "".join(
3148            [
3149                re.escape(self.quoteChar),
3150                "(?:",
3151                inner_pattern,
3152                ")*",
3153                re.escape(self.endQuoteChar),
3154            ]
3155        )
3156
3157        try:
3158            self.re = re.compile(self.pattern, self.flags)
3159            self.reString = self.pattern
3160            self.re_match = self.re.match
3161        except sre_constants.error:
3162            raise ValueError(
3163                "invalid pattern {!r} passed to Regex".format(self.pattern)
3164            )
3165
3166        self.errmsg = "Expected " + self.name
3167        self.mayIndexError = False
3168        self.mayReturnEmpty = True
3169
3170    def _generateDefaultName(self):
3171        if self.quoteChar == self.endQuoteChar and isinstance(self.quoteChar, str_type):
3172            return "string enclosed in {!r}".format(self.quoteChar)
3173
3174        return "quoted string, starting with {} ending with {}".format(
3175            self.quoteChar, self.endQuoteChar
3176        )
3177
3178    def parseImpl(self, instring, loc, doActions=True):
3179        result = (
3180            instring[loc] == self.firstQuoteChar
3181            and self.re_match(instring, loc)
3182            or None
3183        )
3184        if not result:
3185            raise ParseException(instring, loc, self.errmsg, self)
3186
3187        loc = result.end()
3188        ret = result.group()
3189
3190        if self.unquoteResults:
3191
3192            # strip off quotes
3193            ret = ret[self.quoteCharLen : -self.endQuoteCharLen]
3194
3195            if isinstance(ret, str_type):
3196                # replace escaped whitespace
3197                if "\\" in ret and self.convertWhitespaceEscapes:
3198                    for wslit, wschar in self.ws_map:
3199                        ret = ret.replace(wslit, wschar)
3200
3201                # replace escaped characters
3202                if self.escChar:
3203                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3204
3205                # replace escaped quotes
3206                if self.escQuote:
3207                    ret = ret.replace(self.escQuote, self.endQuoteChar)
3208
3209        return loc, ret
3210
3211
3212class CharsNotIn(Token):
3213    """Token for matching words composed of characters *not* in a given
3214    set (will include whitespace in matched characters if not listed in
3215    the provided exclusion set - see example). Defined with string
3216    containing all disallowed characters, and an optional minimum,
3217    maximum, and/or exact length.  The default value for ``min`` is
3218    1 (a minimum value < 1 is not valid); the default values for
3219    ``max`` and ``exact`` are 0, meaning no maximum or exact
3220    length restriction.
3221
3222    Example::
3223
3224        # define a comma-separated-value as anything that is not a ','
3225        csv_value = CharsNotIn(',')
3226        print(delimited_list(csv_value).parse_string("dkls,lsdkjf,s12 34,@!#,213"))
3227
3228    prints::
3229
3230        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3231    """
3232
3233    def __init__(
3234        self,
3235        not_chars: str = "",
3236        min: int = 1,
3237        max: int = 0,
3238        exact: int = 0,
3239        *,
3240        notChars: str = "",
3241    ):
3242        super().__init__()
3243        self.skipWhitespace = False
3244        self.notChars = not_chars or notChars
3245        self.notCharsSet = set(self.notChars)
3246
3247        if min < 1:
3248            raise ValueError(
3249                "cannot specify a minimum length < 1; use "
3250                "Opt(CharsNotIn()) if zero-length char group is permitted"
3251            )
3252
3253        self.minLen = min
3254
3255        if max > 0:
3256            self.maxLen = max
3257        else:
3258            self.maxLen = _MAX_INT
3259
3260        if exact > 0:
3261            self.maxLen = exact
3262            self.minLen = exact
3263
3264        self.errmsg = "Expected " + self.name
3265        self.mayReturnEmpty = self.minLen == 0
3266        self.mayIndexError = False
3267
3268    def _generateDefaultName(self):
3269        not_chars_str = _collapse_string_to_ranges(self.notChars)
3270        if len(not_chars_str) > 16:
3271            return "!W:({}...)".format(self.notChars[: 16 - 3])
3272        else:
3273            return "!W:({})".format(self.notChars)
3274
3275    def parseImpl(self, instring, loc, doActions=True):
3276        notchars = self.notCharsSet
3277        if instring[loc] in notchars:
3278            raise ParseException(instring, loc, self.errmsg, self)
3279
3280        start = loc
3281        loc += 1
3282        maxlen = min(start + self.maxLen, len(instring))
3283        while loc < maxlen and instring[loc] not in notchars:
3284            loc += 1
3285
3286        if loc - start < self.minLen:
3287            raise ParseException(instring, loc, self.errmsg, self)
3288
3289        return loc, instring[start:loc]
3290
3291
3292class White(Token):
3293    """Special matching class for matching whitespace.  Normally,
3294    whitespace is ignored by pyparsing grammars.  This class is included
3295    when some whitespace structures are significant.  Define with
3296    a string containing the whitespace characters to be matched; default
3297    is ``" \\t\\r\\n"``.  Also takes optional ``min``,
3298    ``max``, and ``exact`` arguments, as defined for the
3299    :class:`Word` class.
3300    """
3301
3302    whiteStrs = {
3303        " ": "<SP>",
3304        "\t": "<TAB>",
3305        "\n": "<LF>",
3306        "\r": "<CR>",
3307        "\f": "<FF>",
3308        "\u00A0": "<NBSP>",
3309        "\u1680": "<OGHAM_SPACE_MARK>",
3310        "\u180E": "<MONGOLIAN_VOWEL_SEPARATOR>",
3311        "\u2000": "<EN_QUAD>",
3312        "\u2001": "<EM_QUAD>",
3313        "\u2002": "<EN_SPACE>",
3314        "\u2003": "<EM_SPACE>",
3315        "\u2004": "<THREE-PER-EM_SPACE>",
3316        "\u2005": "<FOUR-PER-EM_SPACE>",
3317        "\u2006": "<SIX-PER-EM_SPACE>",
3318        "\u2007": "<FIGURE_SPACE>",
3319        "\u2008": "<PUNCTUATION_SPACE>",
3320        "\u2009": "<THIN_SPACE>",
3321        "\u200A": "<HAIR_SPACE>",
3322        "\u200B": "<ZERO_WIDTH_SPACE>",
3323        "\u202F": "<NNBSP>",
3324        "\u205F": "<MMSP>",
3325        "\u3000": "<IDEOGRAPHIC_SPACE>",
3326    }
3327
3328    def __init__(self, ws: str = " \t\r\n", min: int = 1, max: int = 0, exact: int = 0):
3329        super().__init__()
3330        self.matchWhite = ws
3331        self.set_whitespace_chars(
3332            "".join(c for c in self.whiteChars if c not in self.matchWhite),
3333            copy_defaults=True,
3334        )
3335        # self.leave_whitespace()
3336        self.mayReturnEmpty = True
3337        self.errmsg = "Expected " + self.name
3338
3339        self.minLen = min
3340
3341        if max > 0:
3342            self.maxLen = max
3343        else:
3344            self.maxLen = _MAX_INT
3345
3346        if exact > 0:
3347            self.maxLen = exact
3348            self.minLen = exact
3349
3350    def _generateDefaultName(self):
3351        return "".join(White.whiteStrs[c] for c in self.matchWhite)
3352
3353    def parseImpl(self, instring, loc, doActions=True):
3354        if instring[loc] not in self.matchWhite:
3355            raise ParseException(instring, loc, self.errmsg, self)
3356        start = loc
3357        loc += 1
3358        maxloc = start + self.maxLen
3359        maxloc = min(maxloc, len(instring))
3360        while loc < maxloc and instring[loc] in self.matchWhite:
3361            loc += 1
3362
3363        if loc - start < self.minLen:
3364            raise ParseException(instring, loc, self.errmsg, self)
3365
3366        return loc, instring[start:loc]
3367
3368
3369class PositionToken(Token):
3370    def __init__(self):
3371        super().__init__()
3372        self.mayReturnEmpty = True
3373        self.mayIndexError = False
3374
3375
3376class GoToColumn(PositionToken):
3377    """Token to advance to a specific column of input text; useful for
3378    tabular report scraping.
3379    """
3380
3381    def __init__(self, colno: int):
3382        super().__init__()
3383        self.col = colno
3384
3385    def preParse(self, instring, loc):
3386        if col(loc, instring) != self.col:
3387            instrlen = len(instring)
3388            if self.ignoreExprs:
3389                loc = self._skipIgnorables(instring, loc)
3390            while (
3391                loc < instrlen
3392                and instring[loc].isspace()
3393                and col(loc, instring) != self.col
3394            ):
3395                loc += 1
3396        return loc
3397
3398    def parseImpl(self, instring, loc, doActions=True):
3399        thiscol = col(loc, instring)
3400        if thiscol > self.col:
3401            raise ParseException(instring, loc, "Text not in expected column", self)
3402        newloc = loc + self.col - thiscol
3403        ret = instring[loc:newloc]
3404        return newloc, ret
3405
3406
3407class LineStart(PositionToken):
3408    r"""Matches if current position is at the beginning of a line within
3409    the parse string
3410
3411    Example::
3412
3413        test = '''\
3414        AAA this line
3415        AAA and this line
3416          AAA but not this one
3417        B AAA and definitely not this one
3418        '''
3419
3420        for t in (LineStart() + 'AAA' + restOfLine).search_string(test):
3421            print(t)
3422
3423    prints::
3424
3425        ['AAA', ' this line']
3426        ['AAA', ' and this line']
3427
3428    """
3429
3430    def __init__(self):
3431        super().__init__()
3432        self.leave_whitespace()
3433        self.orig_whiteChars = set() | self.whiteChars
3434        self.whiteChars.discard("\n")
3435        self.skipper = Empty().set_whitespace_chars(self.whiteChars)
3436        self.errmsg = "Expected start of line"
3437
3438    def preParse(self, instring, loc):
3439        if loc == 0:
3440            return loc
3441        else:
3442            ret = self.skipper.preParse(instring, loc)
3443            if "\n" in self.orig_whiteChars:
3444                while instring[ret : ret + 1] == "\n":
3445                    ret = self.skipper.preParse(instring, ret + 1)
3446            return ret
3447
3448    def parseImpl(self, instring, loc, doActions=True):
3449        if col(loc, instring) == 1:
3450            return loc, []
3451        raise ParseException(instring, loc, self.errmsg, self)
3452
3453
3454class LineEnd(PositionToken):
3455    """Matches if current position is at the end of a line within the
3456    parse string
3457    """
3458
3459    def __init__(self):
3460        super().__init__()
3461        self.whiteChars.discard("\n")
3462        self.set_whitespace_chars(self.whiteChars, copy_defaults=False)
3463        self.errmsg = "Expected end of line"
3464
3465    def parseImpl(self, instring, loc, doActions=True):
3466        if loc < len(instring):
3467            if instring[loc] == "\n":
3468                return loc + 1, "\n"
3469            else:
3470                raise ParseException(instring, loc, self.errmsg, self)
3471        elif loc == len(instring):
3472            return loc + 1, []
3473        else:
3474            raise ParseException(instring, loc, self.errmsg, self)
3475
3476
3477class StringStart(PositionToken):
3478    """Matches if current position is at the beginning of the parse
3479    string
3480    """
3481
3482    def __init__(self):
3483        super().__init__()
3484        self.errmsg = "Expected start of text"
3485
3486    def parseImpl(self, instring, loc, doActions=True):
3487        if loc != 0:
3488            # see if entire string up to here is just whitespace and ignoreables
3489            if loc != self.preParse(instring, 0):
3490                raise ParseException(instring, loc, self.errmsg, self)
3491        return loc, []
3492
3493
3494class StringEnd(PositionToken):
3495    """
3496    Matches if current position is at the end of the parse string
3497    """
3498
3499    def __init__(self):
3500        super().__init__()
3501        self.errmsg = "Expected end of text"
3502
3503    def parseImpl(self, instring, loc, doActions=True):
3504        if loc < len(instring):
3505            raise ParseException(instring, loc, self.errmsg, self)
3506        elif loc == len(instring):
3507            return loc + 1, []
3508        elif loc > len(instring):
3509            return loc, []
3510        else:
3511            raise ParseException(instring, loc, self.errmsg, self)
3512
3513
3514class WordStart(PositionToken):
3515    """Matches if the current position is at the beginning of a
3516    :class:`Word`, and is not preceded by any character in a given
3517    set of ``word_chars`` (default= ``printables``). To emulate the
3518    ``\b`` behavior of regular expressions, use
3519    ``WordStart(alphanums)``. ``WordStart`` will also match at
3520    the beginning of the string being parsed, or at the beginning of
3521    a line.
3522    """
3523
3524    def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3525        wordChars = word_chars if wordChars != printables else wordChars
3526        super().__init__()
3527        self.wordChars = set(wordChars)
3528        self.errmsg = "Not at the start of a word"
3529
3530    def parseImpl(self, instring, loc, doActions=True):
3531        if loc != 0:
3532            if (
3533                instring[loc - 1] in self.wordChars
3534                or instring[loc] not in self.wordChars
3535            ):
3536                raise ParseException(instring, loc, self.errmsg, self)
3537        return loc, []
3538
3539
3540class WordEnd(PositionToken):
3541    """Matches if the current position is at the end of a :class:`Word`,
3542    and is not followed by any character in a given set of ``word_chars``
3543    (default= ``printables``). To emulate the ``\b`` behavior of
3544    regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3545    will also match at the end of the string being parsed, or at the end
3546    of a line.
3547    """
3548
3549    def __init__(self, word_chars: str = printables, *, wordChars: str = printables):
3550        wordChars = word_chars if wordChars != printables else wordChars
3551        super().__init__()
3552        self.wordChars = set(wordChars)
3553        self.skipWhitespace = False
3554        self.errmsg = "Not at the end of a word"
3555
3556    def parseImpl(self, instring, loc, doActions=True):
3557        instrlen = len(instring)
3558        if instrlen > 0 and loc < instrlen:
3559            if (
3560                instring[loc] in self.wordChars
3561                or instring[loc - 1] not in self.wordChars
3562            ):
3563                raise ParseException(instring, loc, self.errmsg, self)
3564        return loc, []
3565
3566
3567class ParseExpression(ParserElement):
3568    """Abstract subclass of ParserElement, for combining and
3569    post-processing parsed tokens.
3570    """
3571
3572    def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False):
3573        super().__init__(savelist)
3574        self.exprs: List[ParserElement]
3575        if isinstance(exprs, _generatorType):
3576            exprs = list(exprs)
3577
3578        if isinstance(exprs, str_type):
3579            self.exprs = [self._literalStringClass(exprs)]
3580        elif isinstance(exprs, ParserElement):
3581            self.exprs = [exprs]
3582        elif isinstance(exprs, Iterable):
3583            exprs = list(exprs)
3584            # if sequence of strings provided, wrap with Literal
3585            if any(isinstance(expr, str_type) for expr in exprs):
3586                exprs = (
3587                    self._literalStringClass(e) if isinstance(e, str_type) else e
3588                    for e in exprs
3589                )
3590            self.exprs = list(exprs)
3591        else:
3592            try:
3593                self.exprs = list(exprs)
3594            except TypeError:
3595                self.exprs = [exprs]
3596        self.callPreparse = False
3597
3598    def recurse(self):
3599        return self.exprs[:]
3600
3601    def append(self, other):
3602        self.exprs.append(other)
3603        self._defaultName = None
3604        return self
3605
3606    def leave_whitespace(self, recursive=True):
3607        """
3608        Extends ``leave_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3609           all contained expressions.
3610        """
3611        super().leave_whitespace(recursive)
3612
3613        if recursive:
3614            self.exprs = [e.copy() for e in self.exprs]
3615            for e in self.exprs:
3616                e.leave_whitespace(recursive)
3617        return self
3618
3619    def ignore_whitespace(self, recursive=True):
3620        """
3621        Extends ``ignore_whitespace`` defined in base class, and also invokes ``leave_whitespace`` on
3622           all contained expressions.
3623        """
3624        super().ignore_whitespace(recursive)
3625        if recursive:
3626            self.exprs = [e.copy() for e in self.exprs]
3627            for e in self.exprs:
3628                e.ignore_whitespace(recursive)
3629        return self
3630
3631    def ignore(self, other):
3632        if isinstance(other, Suppress):
3633            if other not in self.ignoreExprs:
3634                super().ignore(other)
3635                for e in self.exprs:
3636                    e.ignore(self.ignoreExprs[-1])
3637        else:
3638            super().ignore(other)
3639            for e in self.exprs:
3640                e.ignore(self.ignoreExprs[-1])
3641        return self
3642
3643    def _generateDefaultName(self):
3644        return "{}:({})".format(self.__class__.__name__, str(self.exprs))
3645
3646    def streamline(self):
3647        if self.streamlined:
3648            return self
3649
3650        super().streamline()
3651
3652        for e in self.exprs:
3653            e.streamline()
3654
3655        # collapse nested :class:`And`'s of the form ``And(And(And(a, b), c), d)`` to ``And(a, b, c, d)``
3656        # but only if there are no parse actions or resultsNames on the nested And's
3657        # (likewise for :class:`Or`'s and :class:`MatchFirst`'s)
3658        if len(self.exprs) == 2:
3659            other = self.exprs[0]
3660            if (
3661                isinstance(other, self.__class__)
3662                and not other.parseAction
3663                and other.resultsName is None
3664                and not other.debug
3665            ):
3666                self.exprs = other.exprs[:] + [self.exprs[1]]
3667                self._defaultName = None
3668                self.mayReturnEmpty |= other.mayReturnEmpty
3669                self.mayIndexError |= other.mayIndexError
3670
3671            other = self.exprs[-1]
3672            if (
3673                isinstance(other, self.__class__)
3674                and not other.parseAction
3675                and other.resultsName is None
3676                and not other.debug
3677            ):
3678                self.exprs = self.exprs[:-1] + other.exprs[:]
3679                self._defaultName = None
3680                self.mayReturnEmpty |= other.mayReturnEmpty
3681                self.mayIndexError |= other.mayIndexError
3682
3683        self.errmsg = "Expected " + str(self)
3684
3685        return self
3686
3687    def validate(self, validateTrace=None):
3688        tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3689        for e in self.exprs:
3690            e.validate(tmp)
3691        self._checkRecursion([])
3692
3693    def copy(self):
3694        ret = super().copy()
3695        ret.exprs = [e.copy() for e in self.exprs]
3696        return ret
3697
3698    def _setResultsName(self, name, listAllMatches=False):
3699        if (
3700            __diag__.warn_ungrouped_named_tokens_in_collection
3701            and Diagnostics.warn_ungrouped_named_tokens_in_collection
3702            not in self.suppress_warnings_
3703        ):
3704            for e in self.exprs:
3705                if (
3706                    isinstance(e, ParserElement)
3707                    and e.resultsName
3708                    and Diagnostics.warn_ungrouped_named_tokens_in_collection
3709                    not in e.suppress_warnings_
3710                ):
3711                    warnings.warn(
3712                        "{}: setting results name {!r} on {} expression "
3713                        "collides with {!r} on contained expression".format(
3714                            "warn_ungrouped_named_tokens_in_collection",
3715                            name,
3716                            type(self).__name__,
3717                            e.resultsName,
3718                        ),
3719                        stacklevel=3,
3720                    )
3721
3722        return super()._setResultsName(name, listAllMatches)
3723
3724    ignoreWhitespace = ignore_whitespace
3725    leaveWhitespace = leave_whitespace
3726
3727
3728class And(ParseExpression):
3729    """
3730    Requires all given :class:`ParseExpression` s to be found in the given order.
3731    Expressions may be separated by whitespace.
3732    May be constructed using the ``'+'`` operator.
3733    May also be constructed using the ``'-'`` operator, which will
3734    suppress backtracking.
3735
3736    Example::
3737
3738        integer = Word(nums)
3739        name_expr = OneOrMore(Word(alphas))
3740
3741        expr = And([integer("id"), name_expr("name"), integer("age")])
3742        # more easily written as:
3743        expr = integer("id") + name_expr("name") + integer("age")
3744    """
3745
3746    class _ErrorStop(Empty):
3747        def __init__(self, *args, **kwargs):
3748            super().__init__(*args, **kwargs)
3749            self.leave_whitespace()
3750
3751        def _generateDefaultName(self):
3752            return "-"
3753
3754    def __init__(self, exprs_arg: IterableType[ParserElement], savelist: bool = True):
3755        exprs: List[ParserElement] = list(exprs_arg)
3756        if exprs and Ellipsis in exprs:
3757            tmp = []
3758            for i, expr in enumerate(exprs):
3759                if expr is Ellipsis:
3760                    if i < len(exprs) - 1:
3761                        skipto_arg: ParserElement = (Empty() + exprs[i + 1]).exprs[-1]
3762                        tmp.append(SkipTo(skipto_arg)("_skipped*"))
3763                    else:
3764                        raise Exception(
3765                            "cannot construct And with sequence ending in ..."
3766                        )
3767                else:
3768                    tmp.append(expr)
3769            exprs[:] = tmp
3770        super().__init__(exprs, savelist)
3771        if self.exprs:
3772            self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3773            self.set_whitespace_chars(
3774                self.exprs[0].whiteChars,
3775                copy_defaults=self.exprs[0].copyDefaultWhiteChars,
3776            )
3777            self.skipWhitespace = self.exprs[0].skipWhitespace
3778        else:
3779            self.mayReturnEmpty = True
3780        self.callPreparse = True
3781
3782    def streamline(self) -> ParserElement:
3783        # collapse any _PendingSkip's
3784        if self.exprs:
3785            if any(
3786                isinstance(e, ParseExpression)
3787                and e.exprs
3788                and isinstance(e.exprs[-1], _PendingSkip)
3789                for e in self.exprs[:-1]
3790            ):
3791                for i, e in enumerate(self.exprs[:-1]):
3792                    if e is None:
3793                        continue
3794                    if (
3795                        isinstance(e, ParseExpression)
3796                        and e.exprs
3797                        and isinstance(e.exprs[-1], _PendingSkip)
3798                    ):
3799                        e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
3800                        self.exprs[i + 1] = None
3801                self.exprs = [e for e in self.exprs if e is not None]
3802
3803        super().streamline()
3804
3805        # link any IndentedBlocks to the prior expression
3806        for prev, cur in zip(self.exprs, self.exprs[1:]):
3807            # traverse cur or any first embedded expr of cur looking for an IndentedBlock
3808            # (but watch out for recursive grammar)
3809            seen = set()
3810            while cur:
3811                if id(cur) in seen:
3812                    break
3813                seen.add(id(cur))
3814                if isinstance(cur, IndentedBlock):
3815                    prev.add_parse_action(
3816                        lambda s, l, t: setattr(cur, "parent_anchor", col(l, s))
3817                    )
3818                    break
3819                subs = cur.recurse()
3820                cur = next(iter(subs), None)
3821
3822        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3823        return self
3824
3825    def parseImpl(self, instring, loc, doActions=True):
3826        # pass False as callPreParse arg to _parse for first element, since we already
3827        # pre-parsed the string as part of our And pre-parsing
3828        loc, resultlist = self.exprs[0]._parse(
3829            instring, loc, doActions, callPreParse=False
3830        )
3831        errorStop = False
3832        for e in self.exprs[1:]:
3833            # if isinstance(e, And._ErrorStop):
3834            if type(e) is And._ErrorStop:
3835                errorStop = True
3836                continue
3837            if errorStop:
3838                try:
3839                    loc, exprtokens = e._parse(instring, loc, doActions)
3840                except ParseSyntaxException:
3841                    raise
3842                except ParseBaseException as pe:
3843                    pe.__traceback__ = None
3844                    raise ParseSyntaxException._from_exception(pe)
3845                except IndexError:
3846                    raise ParseSyntaxException(
3847                        instring, len(instring), self.errmsg, self
3848                    )
3849            else:
3850                loc, exprtokens = e._parse(instring, loc, doActions)
3851            if exprtokens or exprtokens.haskeys():
3852                resultlist += exprtokens
3853        return loc, resultlist
3854
3855    def __iadd__(self, other):
3856        if isinstance(other, str_type):
3857            other = self._literalStringClass(other)
3858        return self.append(other)  # And([self, other])
3859
3860    def _checkRecursion(self, parseElementList):
3861        subRecCheckList = parseElementList[:] + [self]
3862        for e in self.exprs:
3863            e._checkRecursion(subRecCheckList)
3864            if not e.mayReturnEmpty:
3865                break
3866
3867    def _generateDefaultName(self):
3868        inner = " ".join(str(e) for e in self.exprs)
3869        # strip off redundant inner {}'s
3870        while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
3871            inner = inner[1:-1]
3872        return "{" + inner + "}"
3873
3874
3875class Or(ParseExpression):
3876    """Requires that at least one :class:`ParseExpression` is found. If
3877    two expressions match, the expression that matches the longest
3878    string will be used. May be constructed using the ``'^'``
3879    operator.
3880
3881    Example::
3882
3883        # construct Or using '^' operator
3884
3885        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3886        print(number.search_string("123 3.1416 789"))
3887
3888    prints::
3889
3890        [['123'], ['3.1416'], ['789']]
3891    """
3892
3893    def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False):
3894        super().__init__(exprs, savelist)
3895        if self.exprs:
3896            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3897            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
3898        else:
3899            self.mayReturnEmpty = True
3900
3901    def streamline(self) -> ParserElement:
3902        super().streamline()
3903        if self.exprs:
3904            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3905            self.saveAsList = any(e.saveAsList for e in self.exprs)
3906            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
3907        else:
3908            self.saveAsList = False
3909        return self
3910
3911    def parseImpl(self, instring, loc, doActions=True):
3912        maxExcLoc = -1
3913        maxException = None
3914        matches = []
3915        fatals = []
3916        if all(e.callPreparse for e in self.exprs):
3917            loc = self.preParse(instring, loc)
3918        for e in self.exprs:
3919            try:
3920                loc2 = e.try_parse(instring, loc, raise_fatal=True)
3921            except ParseFatalException as pfe:
3922                pfe.__traceback__ = None
3923                pfe.parserElement = e
3924                fatals.append(pfe)
3925                maxException = None
3926                maxExcLoc = -1
3927            except ParseException as err:
3928                if not fatals:
3929                    err.__traceback__ = None
3930                    if err.loc > maxExcLoc:
3931                        maxException = err
3932                        maxExcLoc = err.loc
3933            except IndexError:
3934                if len(instring) > maxExcLoc:
3935                    maxException = ParseException(
3936                        instring, len(instring), e.errmsg, self
3937                    )
3938                    maxExcLoc = len(instring)
3939            else:
3940                # save match among all matches, to retry longest to shortest
3941                matches.append((loc2, e))
3942
3943        if matches:
3944            # re-evaluate all matches in descending order of length of match, in case attached actions
3945            # might change whether or how much they match of the input.
3946            matches.sort(key=itemgetter(0), reverse=True)
3947
3948            if not doActions:
3949                # no further conditions or parse actions to change the selection of
3950                # alternative, so the first match will be the best match
3951                best_expr = matches[0][1]
3952                return best_expr._parse(instring, loc, doActions)
3953
3954            longest = -1, None
3955            for loc1, expr1 in matches:
3956                if loc1 <= longest[0]:
3957                    # already have a longer match than this one will deliver, we are done
3958                    return longest
3959
3960                try:
3961                    loc2, toks = expr1._parse(instring, loc, doActions)
3962                except ParseException as err:
3963                    err.__traceback__ = None
3964                    if err.loc > maxExcLoc:
3965                        maxException = err
3966                        maxExcLoc = err.loc
3967                else:
3968                    if loc2 >= loc1:
3969                        return loc2, toks
3970                    # didn't match as much as before
3971                    elif loc2 > longest[0]:
3972                        longest = loc2, toks
3973
3974            if longest != (-1, None):
3975                return longest
3976
3977        if fatals:
3978            if len(fatals) > 1:
3979                fatals.sort(key=lambda e: -e.loc)
3980                if fatals[0].loc == fatals[1].loc:
3981                    fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement))))
3982            max_fatal = fatals[0]
3983            raise max_fatal
3984
3985        if maxException is not None:
3986            maxException.msg = self.errmsg
3987            raise maxException
3988        else:
3989            raise ParseException(
3990                instring, loc, "no defined alternatives to match", self
3991            )
3992
3993    def __ixor__(self, other):
3994        if isinstance(other, str_type):
3995            other = self._literalStringClass(other)
3996        return self.append(other)  # Or([self, other])
3997
3998    def _generateDefaultName(self):
3999        return "{" + " ^ ".join(str(e) for e in self.exprs) + "}"
4000
4001    def _setResultsName(self, name, listAllMatches=False):
4002        if (
4003            __diag__.warn_multiple_tokens_in_named_alternation
4004            and Diagnostics.warn_multiple_tokens_in_named_alternation
4005            not in self.suppress_warnings_
4006        ):
4007            if any(
4008                isinstance(e, And)
4009                and Diagnostics.warn_multiple_tokens_in_named_alternation
4010                not in e.suppress_warnings_
4011                for e in self.exprs
4012            ):
4013                warnings.warn(
4014                    "{}: setting results name {!r} on {} expression "
4015                    "will return a list of all parsed tokens in an And alternative, "
4016                    "in prior versions only the first token was returned; enclose"
4017                    "contained argument in Group".format(
4018                        "warn_multiple_tokens_in_named_alternation",
4019                        name,
4020                        type(self).__name__,
4021                    ),
4022                    stacklevel=3,
4023                )
4024
4025        return super()._setResultsName(name, listAllMatches)
4026
4027
4028class MatchFirst(ParseExpression):
4029    """Requires that at least one :class:`ParseExpression` is found. If
4030    more than one expression matches, the first one listed is the one that will
4031    match. May be constructed using the ``'|'`` operator.
4032
4033    Example::
4034
4035        # construct MatchFirst using '|' operator
4036
4037        # watch the order of expressions to match
4038        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4039        print(number.search_string("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
4040
4041        # put more selective expression first
4042        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4043        print(number.search_string("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
4044    """
4045
4046    def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False):
4047        super().__init__(exprs, savelist)
4048        if self.exprs:
4049            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4050            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4051        else:
4052            self.mayReturnEmpty = True
4053
4054    def streamline(self) -> ParserElement:
4055        if self.streamlined:
4056            return self
4057
4058        super().streamline()
4059        if self.exprs:
4060            self.saveAsList = any(e.saveAsList for e in self.exprs)
4061            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4062            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
4063        else:
4064            self.saveAsList = False
4065            self.mayReturnEmpty = True
4066        return self
4067
4068    def parseImpl(self, instring, loc, doActions=True):
4069        maxExcLoc = -1
4070        maxException = None
4071
4072        for e in self.exprs:
4073            try:
4074                return e._parse(
4075                    instring,
4076                    loc,
4077                    doActions,
4078                )
4079            except ParseFatalException as pfe:
4080                pfe.__traceback__ = None
4081                pfe.parserElement = e
4082                raise
4083            except ParseException as err:
4084                if err.loc > maxExcLoc:
4085                    maxException = err
4086                    maxExcLoc = err.loc
4087            except IndexError:
4088                if len(instring) > maxExcLoc:
4089                    maxException = ParseException(
4090                        instring, len(instring), e.errmsg, self
4091                    )
4092                    maxExcLoc = len(instring)
4093
4094        if maxException is not None:
4095            maxException.msg = self.errmsg
4096            raise maxException
4097        else:
4098            raise ParseException(
4099                instring, loc, "no defined alternatives to match", self
4100            )
4101
4102    def __ior__(self, other):
4103        if isinstance(other, str_type):
4104            other = self._literalStringClass(other)
4105        return self.append(other)  # MatchFirst([self, other])
4106
4107    def _generateDefaultName(self):
4108        return "{" + " | ".join(str(e) for e in self.exprs) + "}"
4109
4110    def _setResultsName(self, name, listAllMatches=False):
4111        if (
4112            __diag__.warn_multiple_tokens_in_named_alternation
4113            and Diagnostics.warn_multiple_tokens_in_named_alternation
4114            not in self.suppress_warnings_
4115        ):
4116            if any(
4117                isinstance(e, And)
4118                and Diagnostics.warn_multiple_tokens_in_named_alternation
4119                not in e.suppress_warnings_
4120                for e in self.exprs
4121            ):
4122                warnings.warn(
4123                    "{}: setting results name {!r} on {} expression "
4124                    "will return a list of all parsed tokens in an And alternative, "
4125                    "in prior versions only the first token was returned; enclose"
4126                    "contained argument in Group".format(
4127                        "warn_multiple_tokens_in_named_alternation",
4128                        name,
4129                        type(self).__name__,
4130                    ),
4131                    stacklevel=3,
4132                )
4133
4134        return super()._setResultsName(name, listAllMatches)
4135
4136
4137class Each(ParseExpression):
4138    """Requires all given :class:`ParseExpression` s to be found, but in
4139    any order. Expressions may be separated by whitespace.
4140
4141    May be constructed using the ``'&'`` operator.
4142
4143    Example::
4144
4145        color = one_of("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4146        shape_type = one_of("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4147        integer = Word(nums)
4148        shape_attr = "shape:" + shape_type("shape")
4149        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4150        color_attr = "color:" + color("color")
4151        size_attr = "size:" + integer("size")
4152
4153        # use Each (using operator '&') to accept attributes in any order
4154        # (shape and posn are required, color and size are optional)
4155        shape_spec = shape_attr & posn_attr & Opt(color_attr) & Opt(size_attr)
4156
4157        shape_spec.run_tests('''
4158            shape: SQUARE color: BLACK posn: 100, 120
4159            shape: CIRCLE size: 50 color: BLUE posn: 50,80
4160            color:GREEN size:20 shape:TRIANGLE posn:20,40
4161            '''
4162            )
4163
4164    prints::
4165
4166        shape: SQUARE color: BLACK posn: 100, 120
4167        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4168        - color: BLACK
4169        - posn: ['100', ',', '120']
4170          - x: 100
4171          - y: 120
4172        - shape: SQUARE
4173
4174
4175        shape: CIRCLE size: 50 color: BLUE posn: 50,80
4176        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4177        - color: BLUE
4178        - posn: ['50', ',', '80']
4179          - x: 50
4180          - y: 80
4181        - shape: CIRCLE
4182        - size: 50
4183
4184
4185        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4186        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4187        - color: GREEN
4188        - posn: ['20', ',', '40']
4189          - x: 20
4190          - y: 40
4191        - shape: TRIANGLE
4192        - size: 20
4193    """
4194
4195    def __init__(self, exprs: IterableType[ParserElement], savelist: bool = True):
4196        super().__init__(exprs, savelist)
4197        if self.exprs:
4198            self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4199        else:
4200            self.mayReturnEmpty = True
4201        self.skipWhitespace = True
4202        self.initExprGroups = True
4203        self.saveAsList = True
4204
4205    def streamline(self) -> ParserElement:
4206        super().streamline()
4207        if self.exprs:
4208            self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4209        else:
4210            self.mayReturnEmpty = True
4211        return self
4212
4213    def parseImpl(self, instring, loc, doActions=True):
4214        if self.initExprGroups:
4215            self.opt1map = dict(
4216                (id(e.expr), e) for e in self.exprs if isinstance(e, Opt)
4217            )
4218            opt1 = [e.expr for e in self.exprs if isinstance(e, Opt)]
4219            opt2 = [
4220                e
4221                for e in self.exprs
4222                if e.mayReturnEmpty and not isinstance(e, (Opt, Regex, ZeroOrMore))
4223            ]
4224            self.optionals = opt1 + opt2
4225            self.multioptionals = [
4226                e.expr.set_results_name(e.resultsName, list_all_matches=True)
4227                for e in self.exprs
4228                if isinstance(e, _MultipleMatch)
4229            ]
4230            self.multirequired = [
4231                e.expr.set_results_name(e.resultsName, list_all_matches=True)
4232                for e in self.exprs
4233                if isinstance(e, OneOrMore)
4234            ]
4235            self.required = [
4236                e for e in self.exprs if not isinstance(e, (Opt, ZeroOrMore, OneOrMore))
4237            ]
4238            self.required += self.multirequired
4239            self.initExprGroups = False
4240
4241        tmpLoc = loc
4242        tmpReqd = self.required[:]
4243        tmpOpt = self.optionals[:]
4244        multis = self.multioptionals[:]
4245        matchOrder = []
4246
4247        keepMatching = True
4248        failed = []
4249        fatals = []
4250        while keepMatching:
4251            tmpExprs = tmpReqd + tmpOpt + multis
4252            failed.clear()
4253            fatals.clear()
4254            for e in tmpExprs:
4255                try:
4256                    tmpLoc = e.try_parse(instring, tmpLoc, raise_fatal=True)
4257                except ParseFatalException as pfe:
4258                    pfe.__traceback__ = None
4259                    pfe.parserElement = e
4260                    fatals.append(pfe)
4261                    failed.append(e)
4262                except ParseException:
4263                    failed.append(e)
4264                else:
4265                    matchOrder.append(self.opt1map.get(id(e), e))
4266                    if e in tmpReqd:
4267                        tmpReqd.remove(e)
4268                    elif e in tmpOpt:
4269                        tmpOpt.remove(e)
4270            if len(failed) == len(tmpExprs):
4271                keepMatching = False
4272
4273        # look for any ParseFatalExceptions
4274        if fatals:
4275            if len(fatals) > 1:
4276                fatals.sort(key=lambda e: -e.loc)
4277                if fatals[0].loc == fatals[1].loc:
4278                    fatals.sort(key=lambda e: (-e.loc, -len(str(e.parserElement))))
4279            max_fatal = fatals[0]
4280            raise max_fatal
4281
4282        if tmpReqd:
4283            missing = ", ".join(str(e) for e in tmpReqd)
4284            raise ParseException(
4285                instring,
4286                loc,
4287                "Missing one or more required elements ({})".format(missing),
4288            )
4289
4290        # add any unmatched Opts, in case they have default values defined
4291        matchOrder += [e for e in self.exprs if isinstance(e, Opt) and e.expr in tmpOpt]
4292
4293        total_results = ParseResults([])
4294        for e in matchOrder:
4295            loc, results = e._parse(instring, loc, doActions)
4296            total_results += results
4297
4298        return loc, total_results
4299
4300    def _generateDefaultName(self):
4301        return "{" + " & ".join(str(e) for e in self.exprs) + "}"
4302
4303
4304class ParseElementEnhance(ParserElement):
4305    """Abstract subclass of :class:`ParserElement`, for combining and
4306    post-processing parsed tokens.
4307    """
4308
4309    def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
4310        super().__init__(savelist)
4311        if isinstance(expr, str_type):
4312            if issubclass(self._literalStringClass, Token):
4313                expr = self._literalStringClass(expr)
4314            elif issubclass(type(self), self._literalStringClass):
4315                expr = Literal(expr)
4316            else:
4317                expr = self._literalStringClass(Literal(expr))
4318        self.expr = expr
4319        if expr is not None:
4320            self.mayIndexError = expr.mayIndexError
4321            self.mayReturnEmpty = expr.mayReturnEmpty
4322            self.set_whitespace_chars(
4323                expr.whiteChars, copy_defaults=expr.copyDefaultWhiteChars
4324            )
4325            self.skipWhitespace = expr.skipWhitespace
4326            self.saveAsList = expr.saveAsList
4327            self.callPreparse = expr.callPreparse
4328            self.ignoreExprs.extend(expr.ignoreExprs)
4329
4330    def recurse(self):
4331        return [self.expr] if self.expr is not None else []
4332
4333    def parseImpl(self, instring, loc, doActions=True):
4334        if self.expr is not None:
4335            return self.expr._parse(instring, loc, doActions, callPreParse=False)
4336        else:
4337            raise ParseException("", loc, self.errmsg, self)
4338
4339    def leave_whitespace(self, recursive=True):
4340        super().leave_whitespace(recursive)
4341
4342        if recursive:
4343            self.expr = self.expr.copy()
4344            if self.expr is not None:
4345                self.expr.leave_whitespace(recursive)
4346        return self
4347
4348    def ignore_whitespace(self, recursive=True):
4349        super().ignore_whitespace(recursive)
4350
4351        if recursive:
4352            self.expr = self.expr.copy()
4353            if self.expr is not None:
4354                self.expr.ignore_whitespace(recursive)
4355        return self
4356
4357    def ignore(self, other):
4358        if isinstance(other, Suppress):
4359            if other not in self.ignoreExprs:
4360                super().ignore(other)
4361                if self.expr is not None:
4362                    self.expr.ignore(self.ignoreExprs[-1])
4363        else:
4364            super().ignore(other)
4365            if self.expr is not None:
4366                self.expr.ignore(self.ignoreExprs[-1])
4367        return self
4368
4369    def streamline(self):
4370        super().streamline()
4371        if self.expr is not None:
4372            self.expr.streamline()
4373        return self
4374
4375    def _checkRecursion(self, parseElementList):
4376        if self in parseElementList:
4377            raise RecursiveGrammarException(parseElementList + [self])
4378        subRecCheckList = parseElementList[:] + [self]
4379        if self.expr is not None:
4380            self.expr._checkRecursion(subRecCheckList)
4381
4382    def validate(self, validateTrace=None):
4383        if validateTrace is None:
4384            validateTrace = []
4385        tmp = validateTrace[:] + [self]
4386        if self.expr is not None:
4387            self.expr.validate(tmp)
4388        self._checkRecursion([])
4389
4390    def _generateDefaultName(self):
4391        return "{}:({})".format(self.__class__.__name__, str(self.expr))
4392
4393    ignoreWhitespace = ignore_whitespace
4394    leaveWhitespace = leave_whitespace
4395
4396
4397class IndentedBlock(ParseElementEnhance):
4398    """
4399    Expression to match one or more expressions at a given indentation level.
4400    Useful for parsing text where structure is implied by indentation (like Python source code).
4401    """
4402
4403    class _Indent(Empty):
4404        def __init__(self, ref_col: int):
4405            super().__init__()
4406            self.errmsg = "expected indent at column {}".format(ref_col)
4407            self.add_condition(lambda s, l, t: col(l, s) == ref_col)
4408
4409    class _IndentGreater(Empty):
4410        def __init__(self, ref_col: int):
4411            super().__init__()
4412            self.errmsg = "expected indent at column greater than {}".format(ref_col)
4413            self.add_condition(lambda s, l, t: col(l, s) > ref_col)
4414
4415    def __init__(
4416        self, expr: ParserElement, *, recursive: bool = False, grouped: bool = True
4417    ):
4418        super().__init__(expr, savelist=True)
4419        # if recursive:
4420        #     raise NotImplementedError("IndentedBlock with recursive is not implemented")
4421        self._recursive = recursive
4422        self._grouped = grouped
4423        self.parent_anchor = 1
4424
4425    def parseImpl(self, instring, loc, doActions=True):
4426        # advance parse position to non-whitespace by using an Empty()
4427        # this should be the column to be used for all subsequent indented lines
4428        anchor_loc = Empty().preParse(instring, loc)
4429
4430        # see if self.expr matches at the current location - if not it will raise an exception
4431        # and no further work is necessary
4432        self.expr.try_parse(instring, anchor_loc, doActions)
4433
4434        indent_col = col(anchor_loc, instring)
4435        peer_detect_expr = self._Indent(indent_col)
4436
4437        inner_expr = Empty() + peer_detect_expr + self.expr
4438        if self._recursive:
4439            sub_indent = self._IndentGreater(indent_col)
4440            nested_block = IndentedBlock(
4441                self.expr, recursive=self._recursive, grouped=self._grouped
4442            )
4443            nested_block.set_debug(self.debug)
4444            nested_block.parent_anchor = indent_col
4445            inner_expr += Opt(sub_indent + nested_block)
4446
4447        inner_expr.set_name(f"inner {hex(id(inner_expr))[-4:].upper()}@{indent_col}")
4448        block = OneOrMore(inner_expr)
4449
4450        trailing_undent = self._Indent(self.parent_anchor) | StringEnd()
4451
4452        if self._grouped:
4453            wrapper = Group
4454        else:
4455            wrapper = lambda expr: expr
4456        return (wrapper(block) + Optional(trailing_undent)).parseImpl(
4457            instring, anchor_loc, doActions
4458        )
4459
4460
4461class AtStringStart(ParseElementEnhance):
4462    """Matches if expression matches at the beginning of the parse
4463    string::
4464
4465        AtStringStart(Word(nums)).parse_string("123")
4466        # prints ["123"]
4467
4468        AtStringStart(Word(nums)).parse_string("    123")
4469        # raises ParseException
4470    """
4471
4472    def __init__(self, expr: Union[ParserElement, str]):
4473        super().__init__(expr)
4474        self.callPreparse = False
4475
4476    def parseImpl(self, instring, loc, doActions=True):
4477        if loc != 0:
4478            raise ParseException(instring, loc, "not found at string start")
4479        return super().parseImpl(instring, loc, doActions)
4480
4481
4482class AtLineStart(ParseElementEnhance):
4483    r"""Matches if an expression matches at the beginning of a line within
4484    the parse string
4485
4486    Example::
4487
4488        test = '''\
4489        AAA this line
4490        AAA and this line
4491          AAA but not this one
4492        B AAA and definitely not this one
4493        '''
4494
4495        for t in (AtLineStart('AAA') + restOfLine).search_string(test):
4496            print(t)
4497
4498    prints::
4499
4500        ['AAA', ' this line']
4501        ['AAA', ' and this line']
4502
4503    """
4504
4505    def __init__(self, expr: Union[ParserElement, str]):
4506        super().__init__(expr)
4507        self.callPreparse = False
4508
4509    def parseImpl(self, instring, loc, doActions=True):
4510        if col(loc, instring) != 1:
4511            raise ParseException(instring, loc, "not found at line start")
4512        return super().parseImpl(instring, loc, doActions)
4513
4514
4515class FollowedBy(ParseElementEnhance):
4516    """Lookahead matching of the given parse expression.
4517    ``FollowedBy`` does *not* advance the parsing position within
4518    the input string, it only verifies that the specified parse
4519    expression matches at the current position.  ``FollowedBy``
4520    always returns a null token list. If any results names are defined
4521    in the lookahead expression, those *will* be returned for access by
4522    name.
4523
4524    Example::
4525
4526        # use FollowedBy to match a label only if it is followed by a ':'
4527        data_word = Word(alphas)
4528        label = data_word + FollowedBy(':')
4529        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4530
4531        OneOrMore(attr_expr).parse_string("shape: SQUARE color: BLACK posn: upper left").pprint()
4532
4533    prints::
4534
4535        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4536    """
4537
4538    def __init__(self, expr: Union[ParserElement, str]):
4539        super().__init__(expr)
4540        self.mayReturnEmpty = True
4541
4542    def parseImpl(self, instring, loc, doActions=True):
4543        # by using self._expr.parse and deleting the contents of the returned ParseResults list
4544        # we keep any named results that were defined in the FollowedBy expression
4545        _, ret = self.expr._parse(instring, loc, doActions=doActions)
4546        del ret[:]
4547
4548        return loc, ret
4549
4550
4551class PrecededBy(ParseElementEnhance):
4552    """Lookbehind matching of the given parse expression.
4553    ``PrecededBy`` does not advance the parsing position within the
4554    input string, it only verifies that the specified parse expression
4555    matches prior to the current position.  ``PrecededBy`` always
4556    returns a null token list, but if a results name is defined on the
4557    given expression, it is returned.
4558
4559    Parameters:
4560
4561    - expr - expression that must match prior to the current parse
4562      location
4563    - retreat - (default= ``None``) - (int) maximum number of characters
4564      to lookbehind prior to the current parse location
4565
4566    If the lookbehind expression is a string, :class:`Literal`,
4567    :class:`Keyword`, or a :class:`Word` or :class:`CharsNotIn`
4568    with a specified exact or maximum length, then the retreat
4569    parameter is not required. Otherwise, retreat must be specified to
4570    give a maximum number of characters to look back from
4571    the current parse position for a lookbehind match.
4572
4573    Example::
4574
4575        # VB-style variable names with type prefixes
4576        int_var = PrecededBy("#") + pyparsing_common.identifier
4577        str_var = PrecededBy("$") + pyparsing_common.identifier
4578
4579    """
4580
4581    def __init__(
4582        self, expr: Union[ParserElement, str], retreat: OptionalType[int] = None
4583    ):
4584        super().__init__(expr)
4585        self.expr = self.expr().leave_whitespace()
4586        self.mayReturnEmpty = True
4587        self.mayIndexError = False
4588        self.exact = False
4589        if isinstance(expr, str_type):
4590            retreat = len(expr)
4591            self.exact = True
4592        elif isinstance(expr, (Literal, Keyword)):
4593            retreat = expr.matchLen
4594            self.exact = True
4595        elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4596            retreat = expr.maxLen
4597            self.exact = True
4598        elif isinstance(expr, PositionToken):
4599            retreat = 0
4600            self.exact = True
4601        self.retreat = retreat
4602        self.errmsg = "not preceded by " + str(expr)
4603        self.skipWhitespace = False
4604        self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4605
4606    def parseImpl(self, instring, loc=0, doActions=True):
4607        if self.exact:
4608            if loc < self.retreat:
4609                raise ParseException(instring, loc, self.errmsg)
4610            start = loc - self.retreat
4611            _, ret = self.expr._parse(instring, start)
4612        else:
4613            # retreat specified a maximum lookbehind window, iterate
4614            test_expr = self.expr + StringEnd()
4615            instring_slice = instring[max(0, loc - self.retreat) : loc]
4616            last_expr = ParseException(instring, loc, self.errmsg)
4617            for offset in range(1, min(loc, self.retreat + 1) + 1):
4618                try:
4619                    # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4620                    _, ret = test_expr._parse(
4621                        instring_slice, len(instring_slice) - offset
4622                    )
4623                except ParseBaseException as pbe:
4624                    last_expr = pbe
4625                else:
4626                    break
4627            else:
4628                raise last_expr
4629        return loc, ret
4630
4631
4632class Located(ParseElementEnhance):
4633    """
4634    Decorates a returned token with its starting and ending
4635    locations in the input string.
4636
4637    This helper adds the following results names:
4638
4639    - ``locn_start`` - location where matched expression begins
4640    - ``locn_end`` - location where matched expression ends
4641    - ``value`` - the actual parsed results
4642
4643    Be careful if the input text contains ``<TAB>`` characters, you
4644    may want to call :class:`ParserElement.parse_with_tabs`
4645
4646    Example::
4647
4648        wd = Word(alphas)
4649        for match in Located(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
4650            print(match)
4651
4652    prints::
4653
4654        [0, ['ljsdf'], 5]
4655        [8, ['lksdjjf'], 15]
4656        [18, ['lkkjj'], 23]
4657
4658    """
4659
4660    def parseImpl(self, instring, loc, doActions=True):
4661        start = loc
4662        loc, tokens = self.expr._parse(instring, start, doActions, callPreParse=False)
4663        ret_tokens = ParseResults([start, tokens, loc])
4664        ret_tokens["locn_start"] = start
4665        ret_tokens["value"] = tokens
4666        ret_tokens["locn_end"] = loc
4667        if self.resultsName:
4668            # must return as a list, so that the name will be attached to the complete group
4669            return loc, [ret_tokens]
4670        else:
4671            return loc, ret_tokens
4672
4673
4674class NotAny(ParseElementEnhance):
4675    """
4676    Lookahead to disallow matching with the given parse expression.
4677    ``NotAny`` does *not* advance the parsing position within the
4678    input string, it only verifies that the specified parse expression
4679    does *not* match at the current position.  Also, ``NotAny`` does
4680    *not* skip over leading whitespace. ``NotAny`` always returns
4681    a null token list.  May be constructed using the ``'~'`` operator.
4682
4683    Example::
4684
4685        AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4686
4687        # take care not to mistake keywords for identifiers
4688        ident = ~(AND | OR | NOT) + Word(alphas)
4689        boolean_term = Opt(NOT) + ident
4690
4691        # very crude boolean expression - to support parenthesis groups and
4692        # operation hierarchy, use infix_notation
4693        boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
4694
4695        # integers that are followed by "." are actually floats
4696        integer = Word(nums) + ~Char(".")
4697    """
4698
4699    def __init__(self, expr: Union[ParserElement, str]):
4700        super().__init__(expr)
4701        # do NOT use self.leave_whitespace(), don't want to propagate to exprs
4702        # self.leave_whitespace()
4703        self.skipWhitespace = False
4704
4705        self.mayReturnEmpty = True
4706        self.errmsg = "Found unwanted token, " + str(self.expr)
4707
4708    def parseImpl(self, instring, loc, doActions=True):
4709        if self.expr.can_parse_next(instring, loc):
4710            raise ParseException(instring, loc, self.errmsg, self)
4711        return loc, []
4712
4713    def _generateDefaultName(self):
4714        return "~{" + str(self.expr) + "}"
4715
4716
4717class _MultipleMatch(ParseElementEnhance):
4718    def __init__(
4719        self,
4720        expr: ParserElement,
4721        stop_on: OptionalType[Union[ParserElement, str]] = None,
4722        *,
4723        stopOn: OptionalType[Union[ParserElement, str]] = None,
4724    ):
4725        super().__init__(expr)
4726        stopOn = stopOn or stop_on
4727        self.saveAsList = True
4728        ender = stopOn
4729        if isinstance(ender, str_type):
4730            ender = self._literalStringClass(ender)
4731        self.stopOn(ender)
4732
4733    def stopOn(self, ender):
4734        if isinstance(ender, str_type):
4735            ender = self._literalStringClass(ender)
4736        self.not_ender = ~ender if ender is not None else None
4737        return self
4738
4739    def parseImpl(self, instring, loc, doActions=True):
4740        self_expr_parse = self.expr._parse
4741        self_skip_ignorables = self._skipIgnorables
4742        check_ender = self.not_ender is not None
4743        if check_ender:
4744            try_not_ender = self.not_ender.tryParse
4745
4746        # must be at least one (but first see if we are the stopOn sentinel;
4747        # if so, fail)
4748        if check_ender:
4749            try_not_ender(instring, loc)
4750        loc, tokens = self_expr_parse(instring, loc, doActions)
4751        try:
4752            hasIgnoreExprs = not not self.ignoreExprs
4753            while 1:
4754                if check_ender:
4755                    try_not_ender(instring, loc)
4756                if hasIgnoreExprs:
4757                    preloc = self_skip_ignorables(instring, loc)
4758                else:
4759                    preloc = loc
4760                loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4761                if tmptokens or tmptokens.haskeys():
4762                    tokens += tmptokens
4763        except (ParseException, IndexError):
4764            pass
4765
4766        return loc, tokens
4767
4768    def _setResultsName(self, name, listAllMatches=False):
4769        if (
4770            __diag__.warn_ungrouped_named_tokens_in_collection
4771            and Diagnostics.warn_ungrouped_named_tokens_in_collection
4772            not in self.suppress_warnings_
4773        ):
4774            for e in [self.expr] + self.expr.recurse():
4775                if (
4776                    isinstance(e, ParserElement)
4777                    and e.resultsName
4778                    and Diagnostics.warn_ungrouped_named_tokens_in_collection
4779                    not in e.suppress_warnings_
4780                ):
4781                    warnings.warn(
4782                        "{}: setting results name {!r} on {} expression "
4783                        "collides with {!r} on contained expression".format(
4784                            "warn_ungrouped_named_tokens_in_collection",
4785                            name,
4786                            type(self).__name__,
4787                            e.resultsName,
4788                        ),
4789                        stacklevel=3,
4790                    )
4791
4792        return super()._setResultsName(name, listAllMatches)
4793
4794
4795class OneOrMore(_MultipleMatch):
4796    """
4797    Repetition of one or more of the given expression.
4798
4799    Parameters:
4800    - expr - expression that must match one or more times
4801    - stop_on - (default= ``None``) - expression for a terminating sentinel
4802         (only required if the sentinel would ordinarily match the repetition
4803         expression)
4804
4805    Example::
4806
4807        data_word = Word(alphas)
4808        label = data_word + FollowedBy(':')
4809        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).set_parse_action(' '.join))
4810
4811        text = "shape: SQUARE posn: upper left color: BLACK"
4812        OneOrMore(attr_expr).parse_string(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4813
4814        # use stop_on attribute for OneOrMore to avoid reading label string as part of the data
4815        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
4816        OneOrMore(attr_expr).parse_string(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4817
4818        # could also be written as
4819        (attr_expr * (1,)).parse_string(text).pprint()
4820    """
4821
4822    def _generateDefaultName(self):
4823        return "{" + str(self.expr) + "}..."
4824
4825
4826class ZeroOrMore(_MultipleMatch):
4827    """
4828    Optional repetition of zero or more of the given expression.
4829
4830    Parameters:
4831    - ``expr`` - expression that must match zero or more times
4832    - ``stop_on`` - expression for a terminating sentinel
4833      (only required if the sentinel would ordinarily match the repetition
4834      expression) - (default= ``None``)
4835
4836    Example: similar to :class:`OneOrMore`
4837    """
4838
4839    def __init__(
4840        self,
4841        expr: ParserElement,
4842        stop_on: OptionalType[Union[ParserElement, str]] = None,
4843        *,
4844        stopOn: OptionalType[Union[ParserElement, str]] = None,
4845    ):
4846        super().__init__(expr, stopOn=stopOn or stop_on)
4847        self.mayReturnEmpty = True
4848
4849    def parseImpl(self, instring, loc, doActions=True):
4850        try:
4851            return super().parseImpl(instring, loc, doActions)
4852        except (ParseException, IndexError):
4853            return loc, ParseResults([], name=self.resultsName)
4854
4855    def _generateDefaultName(self):
4856        return "[" + str(self.expr) + "]..."
4857
4858
4859class _NullToken:
4860    def __bool__(self):
4861        return False
4862
4863    def __str__(self):
4864        return ""
4865
4866
4867class Opt(ParseElementEnhance):
4868    """
4869    Optional matching of the given expression.
4870
4871    Parameters:
4872    - ``expr`` - expression that must match zero or more times
4873    - ``default`` (optional) - value to be returned if the optional expression is not found.
4874
4875    Example::
4876
4877        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4878        zip = Combine(Word(nums, exact=5) + Opt('-' + Word(nums, exact=4)))
4879        zip.run_tests('''
4880            # traditional ZIP code
4881            12345
4882
4883            # ZIP+4 form
4884            12101-0001
4885
4886            # invalid ZIP
4887            98765-
4888            ''')
4889
4890    prints::
4891
4892        # traditional ZIP code
4893        12345
4894        ['12345']
4895
4896        # ZIP+4 form
4897        12101-0001
4898        ['12101-0001']
4899
4900        # invalid ZIP
4901        98765-
4902             ^
4903        FAIL: Expected end of text (at char 5), (line:1, col:6)
4904    """
4905
4906    __optionalNotMatched = _NullToken()
4907
4908    def __init__(
4909        self, expr: Union[ParserElement, str], default: Any = __optionalNotMatched
4910    ):
4911        super().__init__(expr, savelist=False)
4912        self.saveAsList = self.expr.saveAsList
4913        self.defaultValue = default
4914        self.mayReturnEmpty = True
4915
4916    def parseImpl(self, instring, loc, doActions=True):
4917        self_expr = self.expr
4918        try:
4919            loc, tokens = self_expr._parse(instring, loc, doActions, callPreParse=False)
4920        except (ParseException, IndexError):
4921            default_value = self.defaultValue
4922            if default_value is not self.__optionalNotMatched:
4923                if self_expr.resultsName:
4924                    tokens = ParseResults([default_value])
4925                    tokens[self_expr.resultsName] = default_value
4926                else:
4927                    tokens = [default_value]
4928            else:
4929                tokens = []
4930        return loc, tokens
4931
4932    def _generateDefaultName(self):
4933        inner = str(self.expr)
4934        # strip off redundant inner {}'s
4935        while len(inner) > 1 and inner[0 :: len(inner) - 1] == "{}":
4936            inner = inner[1:-1]
4937        return "[" + inner + "]"
4938
4939
4940Optional = Opt
4941
4942
4943class SkipTo(ParseElementEnhance):
4944    """
4945    Token for skipping over all undefined text until the matched
4946    expression is found.
4947
4948    Parameters:
4949    - ``expr`` - target expression marking the end of the data to be skipped
4950    - ``include`` - if ``True``, the target expression is also parsed
4951      (the skipped text and target expression are returned as a 2-element
4952      list) (default= ``False``).
4953    - ``ignore`` - (default= ``None``) used to define grammars (typically quoted strings and
4954      comments) that might contain false matches to the target expression
4955    - ``fail_on`` - (default= ``None``) define expressions that are not allowed to be
4956      included in the skipped test; if found before the target expression is found,
4957      the :class:`SkipTo` is not a match
4958
4959    Example::
4960
4961        report = '''
4962            Outstanding Issues Report - 1 Jan 2000
4963
4964               # | Severity | Description                               |  Days Open
4965            -----+----------+-------------------------------------------+-----------
4966             101 | Critical | Intermittent system crash                 |          6
4967              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
4968              79 | Minor    | System slow when running too many reports |         47
4969            '''
4970        integer = Word(nums)
4971        SEP = Suppress('|')
4972        # use SkipTo to simply match everything up until the next SEP
4973        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4974        # - parse action will call token.strip() for each matched token, i.e., the description body
4975        string_data = SkipTo(SEP, ignore=quoted_string)
4976        string_data.set_parse_action(token_map(str.strip))
4977        ticket_expr = (integer("issue_num") + SEP
4978                      + string_data("sev") + SEP
4979                      + string_data("desc") + SEP
4980                      + integer("days_open"))
4981
4982        for tkt in ticket_expr.search_string(report):
4983            print tkt.dump()
4984
4985    prints::
4986
4987        ['101', 'Critical', 'Intermittent system crash', '6']
4988        - days_open: 6
4989        - desc: Intermittent system crash
4990        - issue_num: 101
4991        - sev: Critical
4992        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4993        - days_open: 14
4994        - desc: Spelling error on Login ('log|n')
4995        - issue_num: 94
4996        - sev: Cosmetic
4997        ['79', 'Minor', 'System slow when running too many reports', '47']
4998        - days_open: 47
4999        - desc: System slow when running too many reports
5000        - issue_num: 79
5001        - sev: Minor
5002    """
5003
5004    def __init__(
5005        self,
5006        other: Union[ParserElement, str],
5007        include: bool = False,
5008        ignore: bool = None,
5009        fail_on: OptionalType[Union[ParserElement, str]] = None,
5010        *,
5011        failOn: Union[ParserElement, str] = None,
5012    ):
5013        super().__init__(other)
5014        failOn = failOn or fail_on
5015        self.ignoreExpr = ignore
5016        self.mayReturnEmpty = True
5017        self.mayIndexError = False
5018        self.includeMatch = include
5019        self.saveAsList = False
5020        if isinstance(failOn, str_type):
5021            self.failOn = self._literalStringClass(failOn)
5022        else:
5023            self.failOn = failOn
5024        self.errmsg = "No match found for " + str(self.expr)
5025
5026    def parseImpl(self, instring, loc, doActions=True):
5027        startloc = loc
5028        instrlen = len(instring)
5029        self_expr_parse = self.expr._parse
5030        self_failOn_canParseNext = (
5031            self.failOn.canParseNext if self.failOn is not None else None
5032        )
5033        self_ignoreExpr_tryParse = (
5034            self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
5035        )
5036
5037        tmploc = loc
5038        while tmploc <= instrlen:
5039            if self_failOn_canParseNext is not None:
5040                # break if failOn expression matches
5041                if self_failOn_canParseNext(instring, tmploc):
5042                    break
5043
5044            if self_ignoreExpr_tryParse is not None:
5045                # advance past ignore expressions
5046                while 1:
5047                    try:
5048                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
5049                    except ParseBaseException:
5050                        break
5051
5052            try:
5053                self_expr_parse(instring, tmploc, doActions=False, callPreParse=False)
5054            except (ParseException, IndexError):
5055                # no match, advance loc in string
5056                tmploc += 1
5057            else:
5058                # matched skipto expr, done
5059                break
5060
5061        else:
5062            # ran off the end of the input string without matching skipto expr, fail
5063            raise ParseException(instring, loc, self.errmsg, self)
5064
5065        # build up return values
5066        loc = tmploc
5067        skiptext = instring[startloc:loc]
5068        skipresult = ParseResults(skiptext)
5069
5070        if self.includeMatch:
5071            loc, mat = self_expr_parse(instring, loc, doActions, callPreParse=False)
5072            skipresult += mat
5073
5074        return loc, skipresult
5075
5076
5077class Forward(ParseElementEnhance):
5078    """
5079    Forward declaration of an expression to be defined later -
5080    used for recursive grammars, such as algebraic infix notation.
5081    When the expression is known, it is assigned to the ``Forward``
5082    variable using the ``'<<'`` operator.
5083
5084    Note: take care when assigning to ``Forward`` not to overlook
5085    precedence of operators.
5086
5087    Specifically, ``'|'`` has a lower precedence than ``'<<'``, so that::
5088
5089        fwd_expr << a | b | c
5090
5091    will actually be evaluated as::
5092
5093        (fwd_expr << a) | b | c
5094
5095    thereby leaving b and c out as parseable alternatives.  It is recommended that you
5096    explicitly group the values inserted into the ``Forward``::
5097
5098        fwd_expr << (a | b | c)
5099
5100    Converting to use the ``'<<='`` operator instead will avoid this problem.
5101
5102    See :class:`ParseResults.pprint` for an example of a recursive
5103    parser created using ``Forward``.
5104    """
5105
5106    def __init__(self, other: OptionalType[Union[ParserElement, str]] = None):
5107        self.caller_frame = traceback.extract_stack(limit=2)[0]
5108        super().__init__(other, savelist=False)
5109        self.lshift_line = None
5110
5111    def __lshift__(self, other):
5112        if hasattr(self, "caller_frame"):
5113            del self.caller_frame
5114        if isinstance(other, str_type):
5115            other = self._literalStringClass(other)
5116        self.expr = other
5117        self.mayIndexError = self.expr.mayIndexError
5118        self.mayReturnEmpty = self.expr.mayReturnEmpty
5119        self.set_whitespace_chars(
5120            self.expr.whiteChars, copy_defaults=self.expr.copyDefaultWhiteChars
5121        )
5122        self.skipWhitespace = self.expr.skipWhitespace
5123        self.saveAsList = self.expr.saveAsList
5124        self.ignoreExprs.extend(self.expr.ignoreExprs)
5125        self.lshift_line = traceback.extract_stack(limit=2)[-2]
5126        return self
5127
5128    def __ilshift__(self, other):
5129        return self << other
5130
5131    def __or__(self, other):
5132        caller_line = traceback.extract_stack(limit=2)[-2]
5133        if (
5134            __diag__.warn_on_match_first_with_lshift_operator
5135            and caller_line == self.lshift_line
5136            and Diagnostics.warn_on_match_first_with_lshift_operator
5137            not in self.suppress_warnings_
5138        ):
5139            warnings.warn(
5140                "using '<<' operator with '|' is probably an error, use '<<='",
5141                stacklevel=2,
5142            )
5143        ret = super().__or__(other)
5144        return ret
5145
5146    def __del__(self):
5147        # see if we are getting dropped because of '=' reassignment of var instead of '<<=' or '<<'
5148        if (
5149            self.expr is None
5150            and __diag__.warn_on_assignment_to_Forward
5151            and Diagnostics.warn_on_assignment_to_Forward not in self.suppress_warnings_
5152        ):
5153            warnings.warn_explicit(
5154                "Forward defined here but no expression attached later using '<<=' or '<<'",
5155                UserWarning,
5156                filename=self.caller_frame.filename,
5157                lineno=self.caller_frame.lineno,
5158            )
5159
5160    def parseImpl(self, instring, loc, doActions=True):
5161        if (
5162            self.expr is None
5163            and __diag__.warn_on_parse_using_empty_Forward
5164            and Diagnostics.warn_on_parse_using_empty_Forward
5165            not in self.suppress_warnings_
5166        ):
5167            # walk stack until parse_string, scan_string, search_string, or transform_string is found
5168            parse_fns = [
5169                "parse_string",
5170                "scan_string",
5171                "search_string",
5172                "transform_string",
5173            ]
5174            tb = traceback.extract_stack(limit=200)
5175            for i, frm in enumerate(reversed(tb), start=1):
5176                if frm.name in parse_fns:
5177                    stacklevel = i + 1
5178                    break
5179            else:
5180                stacklevel = 2
5181            warnings.warn(
5182                "Forward expression was never assigned a value, will not parse any input",
5183                stacklevel=stacklevel,
5184            )
5185        if not ParserElement._left_recursion_enabled:
5186            return super().parseImpl(instring, loc, doActions)
5187        # ## Bounded Recursion algorithm ##
5188        # Recursion only needs to be processed at ``Forward`` elements, since they are
5189        # the only ones that can actually refer to themselves. The general idea is
5190        # to handle recursion stepwise: We start at no recursion, then recurse once,
5191        # recurse twice, ..., until more recursion offers no benefit (we hit the bound).
5192        #
5193        # The "trick" here is that each ``Forward`` gets evaluated in two contexts
5194        # - to *match* a specific recursion level, and
5195        # - to *search* the bounded recursion level
5196        # and the two run concurrently. The *search* must *match* each recursion level
5197        # to find the best possible match. This is handled by a memo table, which
5198        # provides the previous match to the next level match attempt.
5199        #
5200        # See also "Left Recursion in Parsing Expression Grammars", Medeiros et al.
5201        #
5202        # There is a complication since we not only *parse* but also *transform* via
5203        # actions: We do not want to run the actions too often while expanding. Thus,
5204        # we expand using `doActions=False` and only run `doActions=True` if the next
5205        # recursion level is acceptable.
5206        with ParserElement.recursion_lock:
5207            memo = ParserElement.recursion_memos
5208            try:
5209                # we are parsing at a specific recursion expansion - use it as-is
5210                prev_loc, prev_result = memo[loc, self, doActions]
5211                if isinstance(prev_result, Exception):
5212                    raise prev_result
5213                return prev_loc, prev_result.copy()
5214            except KeyError:
5215                act_key = (loc, self, True)
5216                peek_key = (loc, self, False)
5217                # we are searching for the best recursion expansion - keep on improving
5218                # both `doActions` cases must be tracked separately here!
5219                prev_loc, prev_peek = memo[peek_key] = (
5220                    loc - 1,
5221                    ParseException(
5222                        instring, loc, "Forward recursion without base case", self
5223                    ),
5224                )
5225                if doActions:
5226                    memo[act_key] = memo[peek_key]
5227                while True:
5228                    try:
5229                        new_loc, new_peek = super().parseImpl(instring, loc, False)
5230                    except ParseException:
5231                        # we failed before getting any match – do not hide the error
5232                        if isinstance(prev_peek, Exception):
5233                            raise
5234                        new_loc, new_peek = prev_loc, prev_peek
5235                    # the match did not get better: we are done
5236                    if new_loc <= prev_loc:
5237                        if doActions:
5238                            # replace the match for doActions=False as well,
5239                            # in case the action did backtrack
5240                            prev_loc, prev_result = memo[peek_key] = memo[act_key]
5241                            del memo[peek_key], memo[act_key]
5242                            return prev_loc, prev_result.copy()
5243                        del memo[peek_key]
5244                        return prev_loc, prev_peek.copy()
5245                    # the match did get better: see if we can improve further
5246                    else:
5247                        if doActions:
5248                            try:
5249                                memo[act_key] = super().parseImpl(instring, loc, True)
5250                            except ParseException as e:
5251                                memo[peek_key] = memo[act_key] = (new_loc, e)
5252                                raise
5253                        prev_loc, prev_peek = memo[peek_key] = new_loc, new_peek
5254
5255    def leave_whitespace(self, recursive=True):
5256        self.skipWhitespace = False
5257        return self
5258
5259    def ignore_whitespace(self, recursive=True):
5260        self.skipWhitespace = True
5261        return self
5262
5263    def streamline(self):
5264        if not self.streamlined:
5265            self.streamlined = True
5266            if self.expr is not None:
5267                self.expr.streamline()
5268        return self
5269
5270    def validate(self, validateTrace=None):
5271        if validateTrace is None:
5272            validateTrace = []
5273
5274        if self not in validateTrace:
5275            tmp = validateTrace[:] + [self]
5276            if self.expr is not None:
5277                self.expr.validate(tmp)
5278        self._checkRecursion([])
5279
5280    def _generateDefaultName(self):
5281        # Avoid infinite recursion by setting a temporary _defaultName
5282        self._defaultName = ": ..."
5283
5284        # Use the string representation of main expression.
5285        retString = "..."
5286        try:
5287            if self.expr is not None:
5288                retString = str(self.expr)[:1000]
5289            else:
5290                retString = "None"
5291        finally:
5292            return self.__class__.__name__ + ": " + retString
5293
5294    def copy(self):
5295        if self.expr is not None:
5296            return super().copy()
5297        else:
5298            ret = Forward()
5299            ret <<= self
5300            return ret
5301
5302    def _setResultsName(self, name, list_all_matches=False):
5303        if (
5304            __diag__.warn_name_set_on_empty_Forward
5305            and Diagnostics.warn_name_set_on_empty_Forward
5306            not in self.suppress_warnings_
5307        ):
5308            if self.expr is None:
5309                warnings.warn(
5310                    "{}: setting results name {!r} on {} expression "
5311                    "that has no contained expression".format(
5312                        "warn_name_set_on_empty_Forward", name, type(self).__name__
5313                    ),
5314                    stacklevel=3,
5315                )
5316
5317        return super()._setResultsName(name, list_all_matches)
5318
5319    ignoreWhitespace = ignore_whitespace
5320    leaveWhitespace = leave_whitespace
5321
5322
5323class TokenConverter(ParseElementEnhance):
5324    """
5325    Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5326    """
5327
5328    def __init__(self, expr: Union[ParserElement, str], savelist=False):
5329        super().__init__(expr)  # , savelist)
5330        self.saveAsList = False
5331
5332
5333class Combine(TokenConverter):
5334    """Converter to concatenate all matching tokens to a single string.
5335    By default, the matching patterns must also be contiguous in the
5336    input string; this can be disabled by specifying
5337    ``'adjacent=False'`` in the constructor.
5338
5339    Example::
5340
5341        real = Word(nums) + '.' + Word(nums)
5342        print(real.parse_string('3.1416')) # -> ['3', '.', '1416']
5343        # will also erroneously match the following
5344        print(real.parse_string('3. 1416')) # -> ['3', '.', '1416']
5345
5346        real = Combine(Word(nums) + '.' + Word(nums))
5347        print(real.parse_string('3.1416')) # -> ['3.1416']
5348        # no match when there are internal spaces
5349        print(real.parse_string('3. 1416')) # -> Exception: Expected W:(0123...)
5350    """
5351
5352    def __init__(
5353        self,
5354        expr: ParserElement,
5355        join_string: str = "",
5356        adjacent: bool = True,
5357        *,
5358        joinString: OptionalType[str] = None,
5359    ):
5360        super().__init__(expr)
5361        joinString = joinString if joinString is not None else join_string
5362        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5363        if adjacent:
5364            self.leave_whitespace()
5365        self.adjacent = adjacent
5366        self.skipWhitespace = True
5367        self.joinString = joinString
5368        self.callPreparse = True
5369
5370    def ignore(self, other):
5371        if self.adjacent:
5372            ParserElement.ignore(self, other)
5373        else:
5374            super().ignore(other)
5375        return self
5376
5377    def postParse(self, instring, loc, tokenlist):
5378        retToks = tokenlist.copy()
5379        del retToks[:]
5380        retToks += ParseResults(
5381            ["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults
5382        )
5383
5384        if self.resultsName and retToks.haskeys():
5385            return [retToks]
5386        else:
5387            return retToks
5388
5389
5390class Group(TokenConverter):
5391    """Converter to return the matched tokens as a list - useful for
5392    returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5393
5394    The optional ``aslist`` argument when set to True will return the
5395    parsed tokens as a Python list instead of a pyparsing ParseResults.
5396
5397    Example::
5398
5399        ident = Word(alphas)
5400        num = Word(nums)
5401        term = ident | num
5402        func = ident + Opt(delimited_list(term))
5403        print(func.parse_string("fn a, b, 100"))
5404        # -> ['fn', 'a', 'b', '100']
5405
5406        func = ident + Group(Opt(delimited_list(term)))
5407        print(func.parse_string("fn a, b, 100"))
5408        # -> ['fn', ['a', 'b', '100']]
5409    """
5410
5411    def __init__(self, expr: ParserElement, aslist: bool = False):
5412        super().__init__(expr)
5413        self.saveAsList = True
5414        self._asPythonList = aslist
5415
5416    def postParse(self, instring, loc, tokenlist):
5417        if self._asPythonList:
5418            return ParseResults.List(
5419                tokenlist.asList()
5420                if isinstance(tokenlist, ParseResults)
5421                else list(tokenlist)
5422            )
5423        else:
5424            return [tokenlist]
5425
5426
5427class Dict(TokenConverter):
5428    """Converter to return a repetitive expression as a list, but also
5429    as a dictionary. Each element can also be referenced using the first
5430    token in the expression as its key. Useful for tabular report
5431    scraping when the first column can be used as a item key.
5432
5433    The optional ``asdict`` argument when set to True will return the
5434    parsed tokens as a Python dict instead of a pyparsing ParseResults.
5435
5436    Example::
5437
5438        data_word = Word(alphas)
5439        label = data_word + FollowedBy(':')
5440
5441        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5442        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
5443
5444        # print attributes as plain groups
5445        print(OneOrMore(attr_expr).parse_string(text).dump())
5446
5447        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
5448        result = Dict(OneOrMore(Group(attr_expr))).parse_string(text)
5449        print(result.dump())
5450
5451        # access named fields as dict entries, or output as dict
5452        print(result['shape'])
5453        print(result.as_dict())
5454
5455    prints::
5456
5457        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5458        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5459        - color: light blue
5460        - posn: upper left
5461        - shape: SQUARE
5462        - texture: burlap
5463        SQUARE
5464        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5465
5466    See more examples at :class:`ParseResults` of accessing fields by results name.
5467    """
5468
5469    def __init__(self, expr: ParserElement, asdict: bool = False):
5470        super().__init__(expr)
5471        self.saveAsList = True
5472        self._asPythonDict = asdict
5473
5474    def postParse(self, instring, loc, tokenlist):
5475        for i, tok in enumerate(tokenlist):
5476            if len(tok) == 0:
5477                continue
5478
5479            ikey = tok[0]
5480            if isinstance(ikey, int):
5481                ikey = str(ikey).strip()
5482
5483            if len(tok) == 1:
5484                tokenlist[ikey] = _ParseResultsWithOffset("", i)
5485
5486            elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5487                tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5488
5489            else:
5490                try:
5491                    dictvalue = tok.copy()  # ParseResults(i)
5492                except Exception:
5493                    exc = TypeError(
5494                        "could not extract dict values from parsed results"
5495                        " - Dict expression must contain Grouped expressions"
5496                    )
5497                    raise exc from None
5498
5499                del dictvalue[0]
5500
5501                if len(dictvalue) != 1 or (
5502                    isinstance(dictvalue, ParseResults) and dictvalue.haskeys()
5503                ):
5504                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5505                else:
5506                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5507
5508        if self._asPythonDict:
5509            return [tokenlist.as_dict()] if self.resultsName else tokenlist.as_dict()
5510        else:
5511            return [tokenlist] if self.resultsName else tokenlist
5512
5513
5514class Suppress(TokenConverter):
5515    """Converter for ignoring the results of a parsed expression.
5516
5517    Example::
5518
5519        source = "a, b, c,d"
5520        wd = Word(alphas)
5521        wd_list1 = wd + ZeroOrMore(',' + wd)
5522        print(wd_list1.parse_string(source))
5523
5524        # often, delimiters that are useful during parsing are just in the
5525        # way afterward - use Suppress to keep them out of the parsed output
5526        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
5527        print(wd_list2.parse_string(source))
5528
5529        # Skipped text (using '...') can be suppressed as well
5530        source = "lead in START relevant text END trailing text"
5531        start_marker = Keyword("START")
5532        end_marker = Keyword("END")
5533        find_body = Suppress(...) + start_marker + ... + end_marker
5534        print(find_body.parse_string(source)
5535
5536    prints::
5537
5538        ['a', ',', 'b', ',', 'c', ',', 'd']
5539        ['a', 'b', 'c', 'd']
5540        ['START', 'relevant text ', 'END']
5541
5542    (See also :class:`delimited_list`.)
5543    """
5544
5545    def __init__(self, expr: Union[ParserElement, str], savelist: bool = False):
5546        if expr is ...:
5547            expr = _PendingSkip(NoMatch())
5548        super().__init__(expr)
5549
5550    def __add__(self, other):
5551        if isinstance(self.expr, _PendingSkip):
5552            return Suppress(SkipTo(other)) + other
5553        else:
5554            return super().__add__(other)
5555
5556    def __sub__(self, other):
5557        if isinstance(self.expr, _PendingSkip):
5558            return Suppress(SkipTo(other)) - other
5559        else:
5560            return super().__sub__(other)
5561
5562    def postParse(self, instring, loc, tokenlist):
5563        return []
5564
5565    def suppress(self):
5566        return self
5567
5568
5569def trace_parse_action(f: ParseAction):
5570    """Decorator for debugging parse actions.
5571
5572    When the parse action is called, this decorator will print
5573    ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5574    When the parse action completes, the decorator will print
5575    ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5576
5577    Example::
5578
5579        wd = Word(alphas)
5580
5581        @trace_parse_action
5582        def remove_duplicate_chars(tokens):
5583            return ''.join(sorted(set(''.join(tokens))))
5584
5585        wds = OneOrMore(wd).set_parse_action(remove_duplicate_chars)
5586        print(wds.parse_string("slkdjs sld sldd sdlf sdljf"))
5587
5588    prints::
5589
5590        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5591        <<leaving remove_duplicate_chars (ret: 'dfjkls')
5592        ['dfjkls']
5593    """
5594    f = _trim_arity(f)
5595
5596    def z(*paArgs):
5597        thisFunc = f.__name__
5598        s, l, t = paArgs[-3:]
5599        if len(paArgs) > 3:
5600            thisFunc = paArgs[0].__class__.__name__ + "." + thisFunc
5601        sys.stderr.write(
5602            ">>entering {}(line: {!r}, {}, {!r})\n".format(thisFunc, line(l, s), l, t)
5603        )
5604        try:
5605            ret = f(*paArgs)
5606        except Exception as exc:
5607            sys.stderr.write("<<leaving {} (exception: {})\n".format(thisFunc, exc))
5608            raise
5609        sys.stderr.write("<<leaving {} (ret: {!r})\n".format(thisFunc, ret))
5610        return ret
5611
5612    z.__name__ = f.__name__
5613    return z
5614
5615
5616# convenience constants for positional expressions
5617empty = Empty().set_name("empty")
5618line_start = LineStart().set_name("line_start")
5619line_end = LineEnd().set_name("line_end")
5620string_start = StringStart().set_name("string_start")
5621string_end = StringEnd().set_name("string_end")
5622
5623_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).set_parse_action(
5624    lambda s, l, t: t[0][1]
5625)
5626_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").set_parse_action(
5627    lambda s, l, t: chr(int(t[0].lstrip(r"\0x"), 16))
5628)
5629_escapedOctChar = Regex(r"\\0[0-7]+").set_parse_action(
5630    lambda s, l, t: chr(int(t[0][1:], 8))
5631)
5632_singleChar = (
5633    _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r"\]", exact=1)
5634)
5635_charRange = Group(_singleChar + Suppress("-") + _singleChar)
5636_reBracketExpr = (
5637    Literal("[")
5638    + Opt("^").set_results_name("negate")
5639    + Group(OneOrMore(_charRange | _singleChar)).set_results_name("body")
5640    + "]"
5641)
5642
5643
5644def srange(s):
5645    r"""Helper to easily define string ranges for use in :class:`Word`
5646    construction. Borrows syntax from regexp ``'[]'`` string range
5647    definitions::
5648
5649        srange("[0-9]")   -> "0123456789"
5650        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
5651        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5652
5653    The input string must be enclosed in []'s, and the returned string
5654    is the expanded character set joined into a single string. The
5655    values enclosed in the []'s may be:
5656
5657    - a single character
5658    - an escaped character with a leading backslash (such as ``\-``
5659      or ``\]``)
5660    - an escaped hex character with a leading ``'\x'``
5661      (``\x21``, which is a ``'!'`` character) (``\0x##``
5662      is also supported for backwards compatibility)
5663    - an escaped octal character with a leading ``'\0'``
5664      (``\041``, which is a ``'!'`` character)
5665    - a range of any of the above, separated by a dash (``'a-z'``,
5666      etc.)
5667    - any combination of the above (``'aeiouy'``,
5668      ``'a-zA-Z0-9_$'``, etc.)
5669    """
5670    _expanded = (
5671        lambda p: p
5672        if not isinstance(p, ParseResults)
5673        else "".join(chr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5674    )
5675    try:
5676        return "".join(_expanded(part) for part in _reBracketExpr.parse_string(s).body)
5677    except Exception:
5678        return ""
5679
5680
5681def token_map(func, *args):
5682    """Helper to define a parse action by mapping a function to all
5683    elements of a :class:`ParseResults` list. If any additional args are passed,
5684    they are forwarded to the given function as additional arguments
5685    after the token, as in
5686    ``hex_integer = Word(hexnums).set_parse_action(token_map(int, 16))``,
5687    which will convert the parsed data to an integer using base 16.
5688
5689    Example (compare the last to example in :class:`ParserElement.transform_string`::
5690
5691        hex_ints = OneOrMore(Word(hexnums)).set_parse_action(token_map(int, 16))
5692        hex_ints.run_tests('''
5693            00 11 22 aa FF 0a 0d 1a
5694            ''')
5695
5696        upperword = Word(alphas).set_parse_action(token_map(str.upper))
5697        OneOrMore(upperword).run_tests('''
5698            my kingdom for a horse
5699            ''')
5700
5701        wd = Word(alphas).set_parse_action(token_map(str.title))
5702        OneOrMore(wd).set_parse_action(' '.join).run_tests('''
5703            now is the winter of our discontent made glorious summer by this sun of york
5704            ''')
5705
5706    prints::
5707
5708        00 11 22 aa FF 0a 0d 1a
5709        [0, 17, 34, 170, 255, 10, 13, 26]
5710
5711        my kingdom for a horse
5712        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5713
5714        now is the winter of our discontent made glorious summer by this sun of york
5715        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5716    """
5717
5718    def pa(s, l, t):
5719        return [func(tokn, *args) for tokn in t]
5720
5721    func_name = getattr(func, "__name__", getattr(func, "__class__").__name__)
5722    pa.__name__ = func_name
5723
5724    return pa
5725
5726
5727def autoname_elements():
5728    """
5729    Utility to simplify mass-naming of parser elements, for
5730    generating railroad diagram with named subdiagrams.
5731    """
5732    for name, var in sys._getframe().f_back.f_locals.items():
5733        if isinstance(var, ParserElement) and not var.customName:
5734            var.set_name(name)
5735
5736
5737dbl_quoted_string = Combine(
5738    Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
5739).set_name("string enclosed in double quotes")
5740
5741sgl_quoted_string = Combine(
5742    Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
5743).set_name("string enclosed in single quotes")
5744
5745quoted_string = Combine(
5746    Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
5747    | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'"
5748).set_name("quotedString using single or double quotes")
5749
5750unicode_string = Combine("u" + quoted_string.copy()).set_name("unicode string literal")
5751
5752
5753alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5754punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5755
5756# build list of built-in expressions, for future reference if a global default value
5757# gets updated
5758_builtin_exprs = [v for v in vars().values() if isinstance(v, ParserElement)]
5759
5760# backward compatibility names
5761tokenMap = token_map
5762conditionAsParseAction = condition_as_parse_action
5763nullDebugAction = null_debug_action
5764sglQuotedString = sgl_quoted_string
5765dblQuotedString = dbl_quoted_string
5766quotedString = quoted_string
5767unicodeString = unicode_string
5768lineStart = line_start
5769lineEnd = line_end
5770stringStart = string_start
5771stringEnd = string_end
5772traceParseAction = trace_parse_action
5773