1# -*- coding: utf-8 -*-
2# module pyparsing.py
3#
4# Copyright (c) 2003-2019  Paul T. McGuire
5#
6# Permission is hereby granted, free of charge, to any person obtaining
7# a copy of this software and associated documentation files (the
8# "Software"), to deal in the Software without restriction, including
9# without limitation the rights to use, copy, modify, merge, publish,
10# distribute, sublicense, and/or sell copies of the Software, and to
11# permit persons to whom the Software is furnished to do so, subject to
12# the following conditions:
13#
14# The above copyright notice and this permission notice shall be
15# included in all copies or substantial portions of the Software.
16#
17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24#
25
26__doc__ = \
27"""
28pyparsing module - Classes and methods to define and execute parsing grammars
29=============================================================================
30
31The pyparsing module is an alternative approach to creating and
32executing simple grammars, vs. the traditional lex/yacc approach, or the
33use of regular expressions.  With pyparsing, you don't need to learn
34a new syntax for defining grammars or matching expressions - the parsing
35module provides a library of classes that you use to construct the
36grammar directly in Python.
37
38Here is a program to parse "Hello, World!" (or any greeting of the form
39``"<salutation>, <addressee>!"``), built up using :class:`Word`,
40:class:`Literal`, and :class:`And` elements
41(the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
42and the strings are auto-converted to :class:`Literal` expressions)::
43
44    from pyparsing import Word, alphas
45
46    # define grammar of a greeting
47    greet = Word(alphas) + "," + Word(alphas) + "!"
48
49    hello = "Hello, World!"
50    print (hello, "->", greet.parseString(hello))
51
52The program outputs the following::
53
54    Hello, World! -> ['Hello', ',', 'World', '!']
55
56The Python representation of the grammar is quite readable, owing to the
57self-explanatory class names, and the use of '+', '|' and '^' operators.
58
59The :class:`ParseResults` object returned from
60:class:`ParserElement.parseString` can be
61accessed as a nested list, a dictionary, or an object with named
62attributes.
63
64The pyparsing module handles some of the problems that are typically
65vexing when writing text parsers:
66
67  - extra or missing whitespace (the above program will also handle
68    "Hello,World!", "Hello  ,  World  !", etc.)
69  - quoted strings
70  - embedded comments
71
72
73Getting Started -
74-----------------
75Visit the classes :class:`ParserElement` and :class:`ParseResults` to
76see the base classes that most other pyparsing
77classes inherit from. Use the docstrings for examples of how to:
78
79 - construct literal match expressions from :class:`Literal` and
80   :class:`CaselessLiteral` classes
81 - construct character word-group expressions using the :class:`Word`
82   class
83 - see how to create repetitive expressions using :class:`ZeroOrMore`
84   and :class:`OneOrMore` classes
85 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
86   and :class:`'&'<Each>` operators to combine simple expressions into
87   more complex ones
88 - associate names with your parsed results using
89   :class:`ParserElement.setResultsName`
90 - access the parsed data, which is returned as a :class:`ParseResults`
91   object
92 - find some helpful expression short-cuts like :class:`delimitedList`
93   and :class:`oneOf`
94 - find more useful common expressions in the :class:`pyparsing_common`
95   namespace class
96"""
97
98__version__ = "2.4.7"
99__versionTime__ = "30 Mar 2020 00:43 UTC"
100__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
101
102import string
103from weakref import ref as wkref
104import copy
105import sys
106import warnings
107import re
108import sre_constants
109import collections
110import pprint
111import traceback
112import types
113from datetime import datetime
114from operator import itemgetter
115import itertools
116from functools import wraps
117from contextlib import contextmanager
118
119try:
120    # Python 3
121    from itertools import filterfalse
122except ImportError:
123    from itertools import ifilterfalse as filterfalse
124
125try:
126    from _thread import RLock
127except ImportError:
128    from threading import RLock
129
130try:
131    # Python 3
132    from collections.abc import Iterable
133    from collections.abc import MutableMapping, Mapping
134except ImportError:
135    # Python 2.7
136    from collections import Iterable
137    from collections import MutableMapping, Mapping
138
139try:
140    from collections import OrderedDict as _OrderedDict
141except ImportError:
142    try:
143        from ordereddict import OrderedDict as _OrderedDict
144    except ImportError:
145        _OrderedDict = None
146
147try:
148    from types import SimpleNamespace
149except ImportError:
150    class SimpleNamespace: pass
151
152# version compatibility configuration
153__compat__ = SimpleNamespace()
154__compat__.__doc__ = """
155    A cross-version compatibility configuration for pyparsing features that will be
156    released in a future version. By setting values in this configuration to True,
157    those features can be enabled in prior versions for compatibility development
158    and testing.
159
160     - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping
161       of results names when an And expression is nested within an Or or MatchFirst; set to
162       True to enable bugfix released in pyparsing 2.3.0, or False to preserve
163       pre-2.3.0 handling of named results
164"""
165__compat__.collect_all_And_tokens = True
166
167__diag__ = SimpleNamespace()
168__diag__.__doc__ = """
169Diagnostic configuration (all default to False)
170     - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results
171       name is defined on a MatchFirst or Or expression with one or more And subexpressions
172       (only warns if __compat__.collect_all_And_tokens is False)
173     - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results
174       name is defined on a containing expression with ungrouped subexpressions that also
175       have results names
176     - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined
177       with a results name, but has no contents defined
178     - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is
179       incorrectly called with multiple str arguments
180     - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent
181       calls to ParserElement.setName()
182"""
183__diag__.warn_multiple_tokens_in_named_alternation = False
184__diag__.warn_ungrouped_named_tokens_in_collection = False
185__diag__.warn_name_set_on_empty_Forward = False
186__diag__.warn_on_multiple_string_args_to_oneof = False
187__diag__.enable_debug_on_named_expressions = False
188__diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")]
189
190def _enable_all_warnings():
191    __diag__.warn_multiple_tokens_in_named_alternation = True
192    __diag__.warn_ungrouped_named_tokens_in_collection = True
193    __diag__.warn_name_set_on_empty_Forward = True
194    __diag__.warn_on_multiple_string_args_to_oneof = True
195__diag__.enable_all_warnings = _enable_all_warnings
196
197
198__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__',
199           'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
200           'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
201           'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
202           'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
203           'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
204           'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
205           'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char',
206           'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
207           'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
208           'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
209           'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
210           'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
211           'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
212           'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
213           'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
214           'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
215           'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass',
216           'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set',
217           'conditionAsParseAction', 're',
218           ]
219
220system_version = tuple(sys.version_info)[:3]
221PY_3 = system_version[0] == 3
222if PY_3:
223    _MAX_INT = sys.maxsize
224    basestring = str
225    unichr = chr
226    unicode = str
227    _ustr = str
228
229    # build list of single arg builtins, that can be used as parse actions
230    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
231
232else:
233    _MAX_INT = sys.maxint
234    range = xrange
235
236    def _ustr(obj):
237        """Drop-in replacement for str(obj) that tries to be Unicode
238        friendly. It first tries str(obj). If that fails with
239        a UnicodeEncodeError, then it tries unicode(obj). It then
240        < returns the unicode object | encodes it with the default
241        encoding | ... >.
242        """
243        if isinstance(obj, unicode):
244            return obj
245
246        try:
247            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
248            # it won't break any existing code.
249            return str(obj)
250
251        except UnicodeEncodeError:
252            # Else encode it
253            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
254            xmlcharref = Regex(r'&#\d+;')
255            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
256            return xmlcharref.transformString(ret)
257
258    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
259    singleArgBuiltins = []
260    import __builtin__
261
262    for fname in "sum len sorted reversed list tuple set any all min max".split():
263        try:
264            singleArgBuiltins.append(getattr(__builtin__, fname))
265        except AttributeError:
266            continue
267
268_generatorType = type((y for y in range(1)))
269
270def _xml_escape(data):
271    """Escape &, <, >, ", ', etc. in a string of data."""
272
273    # ampersand must be replaced first
274    from_symbols = '&><"\''
275    to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split())
276    for from_, to_ in zip(from_symbols, to_symbols):
277        data = data.replace(from_, to_)
278    return data
279
280alphas = string.ascii_uppercase + string.ascii_lowercase
281nums = "0123456789"
282hexnums = nums + "ABCDEFabcdef"
283alphanums = alphas + nums
284_bslash = chr(92)
285printables = "".join(c for c in string.printable if c not in string.whitespace)
286
287
288def conditionAsParseAction(fn, message=None, fatal=False):
289    msg = message if message is not None else "failed user-defined condition"
290    exc_type = ParseFatalException if fatal else ParseException
291    fn = _trim_arity(fn)
292
293    @wraps(fn)
294    def pa(s, l, t):
295        if not bool(fn(s, l, t)):
296            raise exc_type(s, l, msg)
297
298    return pa
299
300class ParseBaseException(Exception):
301    """base exception class for all parsing runtime exceptions"""
302    # Performance tuning: we construct a *lot* of these, so keep this
303    # constructor as small and fast as possible
304    def __init__(self, pstr, loc=0, msg=None, elem=None):
305        self.loc = loc
306        if msg is None:
307            self.msg = pstr
308            self.pstr = ""
309        else:
310            self.msg = msg
311            self.pstr = pstr
312        self.parserElement = elem
313        self.args = (pstr, loc, msg)
314
315    @classmethod
316    def _from_exception(cls, pe):
317        """
318        internal factory method to simplify creating one type of ParseException
319        from another - avoids having __init__ signature conflicts among subclasses
320        """
321        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
322
323    def __getattr__(self, aname):
324        """supported attributes by name are:
325           - lineno - returns the line number of the exception text
326           - col - returns the column number of the exception text
327           - line - returns the line containing the exception text
328        """
329        if aname == "lineno":
330            return lineno(self.loc, self.pstr)
331        elif aname in ("col", "column"):
332            return col(self.loc, self.pstr)
333        elif aname == "line":
334            return line(self.loc, self.pstr)
335        else:
336            raise AttributeError(aname)
337
338    def __str__(self):
339        if self.pstr:
340            if self.loc >= len(self.pstr):
341                foundstr = ', found end of text'
342            else:
343                foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\')
344        else:
345            foundstr = ''
346        return ("%s%s  (at char %d), (line:%d, col:%d)" %
347                   (self.msg, foundstr, self.loc, self.lineno, self.column))
348    def __repr__(self):
349        return _ustr(self)
350    def markInputline(self, markerString=">!<"):
351        """Extracts the exception line from the input string, and marks
352           the location of the exception with a special symbol.
353        """
354        line_str = self.line
355        line_column = self.column - 1
356        if markerString:
357            line_str = "".join((line_str[:line_column],
358                                markerString, line_str[line_column:]))
359        return line_str.strip()
360    def __dir__(self):
361        return "lineno col line".split() + dir(type(self))
362
363class ParseException(ParseBaseException):
364    """
365    Exception thrown when parse expressions don't match class;
366    supported attributes by name are:
367    - lineno - returns the line number of the exception text
368    - col - returns the column number of the exception text
369    - line - returns the line containing the exception text
370
371    Example::
372
373        try:
374            Word(nums).setName("integer").parseString("ABC")
375        except ParseException as pe:
376            print(pe)
377            print("column: {}".format(pe.col))
378
379    prints::
380
381       Expected integer (at char 0), (line:1, col:1)
382        column: 1
383
384    """
385
386    @staticmethod
387    def explain(exc, depth=16):
388        """
389        Method to take an exception and translate the Python internal traceback into a list
390        of the pyparsing expressions that caused the exception to be raised.
391
392        Parameters:
393
394         - exc - exception raised during parsing (need not be a ParseException, in support
395           of Python exceptions that might be raised in a parse action)
396         - depth (default=16) - number of levels back in the stack trace to list expression
397           and function names; if None, the full stack trace names will be listed; if 0, only
398           the failing input line, marker, and exception string will be shown
399
400        Returns a multi-line string listing the ParserElements and/or function names in the
401        exception's stack trace.
402
403        Note: the diagnostic output will include string representations of the expressions
404        that failed to parse. These representations will be more helpful if you use `setName` to
405        give identifiable names to your expressions. Otherwise they will use the default string
406        forms, which may be cryptic to read.
407
408        explain() is only supported under Python 3.
409        """
410        import inspect
411
412        if depth is None:
413            depth = sys.getrecursionlimit()
414        ret = []
415        if isinstance(exc, ParseBaseException):
416            ret.append(exc.line)
417            ret.append(' ' * (exc.col - 1) + '^')
418        ret.append("{0}: {1}".format(type(exc).__name__, exc))
419
420        if depth > 0:
421            callers = inspect.getinnerframes(exc.__traceback__, context=depth)
422            seen = set()
423            for i, ff in enumerate(callers[-depth:]):
424                frm = ff[0]
425
426                f_self = frm.f_locals.get('self', None)
427                if isinstance(f_self, ParserElement):
428                    if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'):
429                        continue
430                    if f_self in seen:
431                        continue
432                    seen.add(f_self)
433
434                    self_type = type(f_self)
435                    ret.append("{0}.{1} - {2}".format(self_type.__module__,
436                                                      self_type.__name__,
437                                                      f_self))
438                elif f_self is not None:
439                    self_type = type(f_self)
440                    ret.append("{0}.{1}".format(self_type.__module__,
441                                                self_type.__name__))
442                else:
443                    code = frm.f_code
444                    if code.co_name in ('wrapper', '<module>'):
445                        continue
446
447                    ret.append("{0}".format(code.co_name))
448
449                depth -= 1
450                if not depth:
451                    break
452
453        return '\n'.join(ret)
454
455
456class ParseFatalException(ParseBaseException):
457    """user-throwable exception thrown when inconsistent parse content
458       is found; stops all parsing immediately"""
459    pass
460
461class ParseSyntaxException(ParseFatalException):
462    """just like :class:`ParseFatalException`, but thrown internally
463    when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
464    that parsing is to stop immediately because an unbacktrackable
465    syntax error has been found.
466    """
467    pass
468
469#~ class ReparseException(ParseBaseException):
470    #~ """Experimental class - parse actions can raise this exception to cause
471       #~ pyparsing to reparse the input string:
472        #~ - with a modified input string, and/or
473        #~ - with a modified start location
474       #~ Set the values of the ReparseException in the constructor, and raise the
475       #~ exception in a parse action to cause pyparsing to use the new string/location.
476       #~ Setting the values as None causes no change to be made.
477       #~ """
478    #~ def __init_( self, newstring, restartLoc ):
479        #~ self.newParseText = newstring
480        #~ self.reparseLoc = restartLoc
481
482class RecursiveGrammarException(Exception):
483    """exception thrown by :class:`ParserElement.validate` if the
484    grammar could be improperly recursive
485    """
486    def __init__(self, parseElementList):
487        self.parseElementTrace = parseElementList
488
489    def __str__(self):
490        return "RecursiveGrammarException: %s" % self.parseElementTrace
491
492class _ParseResultsWithOffset(object):
493    def __init__(self, p1, p2):
494        self.tup = (p1, p2)
495    def __getitem__(self, i):
496        return self.tup[i]
497    def __repr__(self):
498        return repr(self.tup[0])
499    def setOffset(self, i):
500        self.tup = (self.tup[0], i)
501
502class ParseResults(object):
503    """Structured parse results, to provide multiple means of access to
504    the parsed data:
505
506       - as a list (``len(results)``)
507       - by list index (``results[0], results[1]``, etc.)
508       - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`)
509
510    Example::
511
512        integer = Word(nums)
513        date_str = (integer.setResultsName("year") + '/'
514                        + integer.setResultsName("month") + '/'
515                        + integer.setResultsName("day"))
516        # equivalent form:
517        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
518
519        # parseString returns a ParseResults object
520        result = date_str.parseString("1999/12/31")
521
522        def test(s, fn=repr):
523            print("%s -> %s" % (s, fn(eval(s))))
524        test("list(result)")
525        test("result[0]")
526        test("result['month']")
527        test("result.day")
528        test("'month' in result")
529        test("'minutes' in result")
530        test("result.dump()", str)
531
532    prints::
533
534        list(result) -> ['1999', '/', '12', '/', '31']
535        result[0] -> '1999'
536        result['month'] -> '12'
537        result.day -> '31'
538        'month' in result -> True
539        'minutes' in result -> False
540        result.dump() -> ['1999', '/', '12', '/', '31']
541        - day: 31
542        - month: 12
543        - year: 1999
544    """
545    def __new__(cls, toklist=None, name=None, asList=True, modal=True):
546        if isinstance(toklist, cls):
547            return toklist
548        retobj = object.__new__(cls)
549        retobj.__doinit = True
550        return retobj
551
552    # Performance tuning: we construct a *lot* of these, so keep this
553    # constructor as small and fast as possible
554    def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance):
555        if self.__doinit:
556            self.__doinit = False
557            self.__name = None
558            self.__parent = None
559            self.__accumNames = {}
560            self.__asList = asList
561            self.__modal = modal
562            if toklist is None:
563                toklist = []
564            if isinstance(toklist, list):
565                self.__toklist = toklist[:]
566            elif isinstance(toklist, _generatorType):
567                self.__toklist = list(toklist)
568            else:
569                self.__toklist = [toklist]
570            self.__tokdict = dict()
571
572        if name is not None and name:
573            if not modal:
574                self.__accumNames[name] = 0
575            if isinstance(name, int):
576                name = _ustr(name)  # will always return a str, but use _ustr for consistency
577            self.__name = name
578            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])):
579                if isinstance(toklist, basestring):
580                    toklist = [toklist]
581                if asList:
582                    if isinstance(toklist, ParseResults):
583                        self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0)
584                    else:
585                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0)
586                    self[name].__name = name
587                else:
588                    try:
589                        self[name] = toklist[0]
590                    except (KeyError, TypeError, IndexError):
591                        self[name] = toklist
592
593    def __getitem__(self, i):
594        if isinstance(i, (int, slice)):
595            return self.__toklist[i]
596        else:
597            if i not in self.__accumNames:
598                return self.__tokdict[i][-1][0]
599            else:
600                return ParseResults([v[0] for v in self.__tokdict[i]])
601
602    def __setitem__(self, k, v, isinstance=isinstance):
603        if isinstance(v, _ParseResultsWithOffset):
604            self.__tokdict[k] = self.__tokdict.get(k, list()) + [v]
605            sub = v[0]
606        elif isinstance(k, (int, slice)):
607            self.__toklist[k] = v
608            sub = v
609        else:
610            self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)]
611            sub = v
612        if isinstance(sub, ParseResults):
613            sub.__parent = wkref(self)
614
615    def __delitem__(self, i):
616        if isinstance(i, (int, slice)):
617            mylen = len(self.__toklist)
618            del self.__toklist[i]
619
620            # convert int to slice
621            if isinstance(i, int):
622                if i < 0:
623                    i += mylen
624                i = slice(i, i + 1)
625            # get removed indices
626            removed = list(range(*i.indices(mylen)))
627            removed.reverse()
628            # fixup indices in token dictionary
629            for name, occurrences in self.__tokdict.items():
630                for j in removed:
631                    for k, (value, position) in enumerate(occurrences):
632                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
633        else:
634            del self.__tokdict[i]
635
636    def __contains__(self, k):
637        return k in self.__tokdict
638
639    def __len__(self):
640        return len(self.__toklist)
641
642    def __bool__(self):
643        return (not not self.__toklist)
644    __nonzero__ = __bool__
645
646    def __iter__(self):
647        return iter(self.__toklist)
648
649    def __reversed__(self):
650        return iter(self.__toklist[::-1])
651
652    def _iterkeys(self):
653        if hasattr(self.__tokdict, "iterkeys"):
654            return self.__tokdict.iterkeys()
655        else:
656            return iter(self.__tokdict)
657
658    def _itervalues(self):
659        return (self[k] for k in self._iterkeys())
660
661    def _iteritems(self):
662        return ((k, self[k]) for k in self._iterkeys())
663
664    if PY_3:
665        keys = _iterkeys
666        """Returns an iterator of all named result keys."""
667
668        values = _itervalues
669        """Returns an iterator of all named result values."""
670
671        items = _iteritems
672        """Returns an iterator of all named result key-value tuples."""
673
674    else:
675        iterkeys = _iterkeys
676        """Returns an iterator of all named result keys (Python 2.x only)."""
677
678        itervalues = _itervalues
679        """Returns an iterator of all named result values (Python 2.x only)."""
680
681        iteritems = _iteritems
682        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
683
684        def keys(self):
685            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
686            return list(self.iterkeys())
687
688        def values(self):
689            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
690            return list(self.itervalues())
691
692        def items(self):
693            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
694            return list(self.iteritems())
695
696    def haskeys(self):
697        """Since keys() returns an iterator, this method is helpful in bypassing
698           code that looks for the existence of any defined results names."""
699        return bool(self.__tokdict)
700
701    def pop(self, *args, **kwargs):
702        """
703        Removes and returns item at specified index (default= ``last``).
704        Supports both ``list`` and ``dict`` semantics for ``pop()``. If
705        passed no argument or an integer argument, it will use ``list``
706        semantics and pop tokens from the list of parsed tokens. If passed
707        a non-integer argument (most likely a string), it will use ``dict``
708        semantics and pop the corresponding value from any defined results
709        names. A second default return value argument is supported, just as in
710        ``dict.pop()``.
711
712        Example::
713
714            def remove_first(tokens):
715                tokens.pop(0)
716            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
717            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
718
719            label = Word(alphas)
720            patt = label("LABEL") + OneOrMore(Word(nums))
721            print(patt.parseString("AAB 123 321").dump())
722
723            # Use pop() in a parse action to remove named result (note that corresponding value is not
724            # removed from list form of results)
725            def remove_LABEL(tokens):
726                tokens.pop("LABEL")
727                return tokens
728            patt.addParseAction(remove_LABEL)
729            print(patt.parseString("AAB 123 321").dump())
730
731        prints::
732
733            ['AAB', '123', '321']
734            - LABEL: AAB
735
736            ['AAB', '123', '321']
737        """
738        if not args:
739            args = [-1]
740        for k, v in kwargs.items():
741            if k == 'default':
742                args = (args[0], v)
743            else:
744                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
745        if (isinstance(args[0], int)
746                or len(args) == 1
747                or args[0] in self):
748            index = args[0]
749            ret = self[index]
750            del self[index]
751            return ret
752        else:
753            defaultvalue = args[1]
754            return defaultvalue
755
756    def get(self, key, defaultValue=None):
757        """
758        Returns named result matching the given key, or if there is no
759        such name, then returns the given ``defaultValue`` or ``None`` if no
760        ``defaultValue`` is specified.
761
762        Similar to ``dict.get()``.
763
764        Example::
765
766            integer = Word(nums)
767            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
768
769            result = date_str.parseString("1999/12/31")
770            print(result.get("year")) # -> '1999'
771            print(result.get("hour", "not specified")) # -> 'not specified'
772            print(result.get("hour")) # -> None
773        """
774        if key in self:
775            return self[key]
776        else:
777            return defaultValue
778
779    def insert(self, index, insStr):
780        """
781        Inserts new element at location index in the list of parsed tokens.
782
783        Similar to ``list.insert()``.
784
785        Example::
786
787            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
788
789            # use a parse action to insert the parse location in the front of the parsed results
790            def insert_locn(locn, tokens):
791                tokens.insert(0, locn)
792            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
793        """
794        self.__toklist.insert(index, insStr)
795        # fixup indices in token dictionary
796        for name, occurrences in self.__tokdict.items():
797            for k, (value, position) in enumerate(occurrences):
798                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
799
800    def append(self, item):
801        """
802        Add single element to end of ParseResults list of elements.
803
804        Example::
805
806            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
807
808            # use a parse action to compute the sum of the parsed integers, and add it to the end
809            def append_sum(tokens):
810                tokens.append(sum(map(int, tokens)))
811            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
812        """
813        self.__toklist.append(item)
814
815    def extend(self, itemseq):
816        """
817        Add sequence of elements to end of ParseResults list of elements.
818
819        Example::
820
821            patt = OneOrMore(Word(alphas))
822
823            # use a parse action to append the reverse of the matched strings, to make a palindrome
824            def make_palindrome(tokens):
825                tokens.extend(reversed([t[::-1] for t in tokens]))
826                return ''.join(tokens)
827            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
828        """
829        if isinstance(itemseq, ParseResults):
830            self.__iadd__(itemseq)
831        else:
832            self.__toklist.extend(itemseq)
833
834    def clear(self):
835        """
836        Clear all elements and results names.
837        """
838        del self.__toklist[:]
839        self.__tokdict.clear()
840
841    def __getattr__(self, name):
842        try:
843            return self[name]
844        except KeyError:
845            return ""
846
847    def __add__(self, other):
848        ret = self.copy()
849        ret += other
850        return ret
851
852    def __iadd__(self, other):
853        if other.__tokdict:
854            offset = len(self.__toklist)
855            addoffset = lambda a: offset if a < 0 else a + offset
856            otheritems = other.__tokdict.items()
857            otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
858                              for k, vlist in otheritems for v in vlist]
859            for k, v in otherdictitems:
860                self[k] = v
861                if isinstance(v[0], ParseResults):
862                    v[0].__parent = wkref(self)
863
864        self.__toklist += other.__toklist
865        self.__accumNames.update(other.__accumNames)
866        return self
867
868    def __radd__(self, other):
869        if isinstance(other, int) and other == 0:
870            # useful for merging many ParseResults using sum() builtin
871            return self.copy()
872        else:
873            # this may raise a TypeError - so be it
874            return other + self
875
876    def __repr__(self):
877        return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict))
878
879    def __str__(self):
880        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
881
882    def _asStringList(self, sep=''):
883        out = []
884        for item in self.__toklist:
885            if out and sep:
886                out.append(sep)
887            if isinstance(item, ParseResults):
888                out += item._asStringList()
889            else:
890                out.append(_ustr(item))
891        return out
892
893    def asList(self):
894        """
895        Returns the parse results as a nested list of matching tokens, all converted to strings.
896
897        Example::
898
899            patt = OneOrMore(Word(alphas))
900            result = patt.parseString("sldkj lsdkj sldkj")
901            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
902            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
903
904            # Use asList() to create an actual list
905            result_list = result.asList()
906            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
907        """
908        return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist]
909
910    def asDict(self):
911        """
912        Returns the named parse results as a nested dictionary.
913
914        Example::
915
916            integer = Word(nums)
917            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
918
919            result = date_str.parseString('12/31/1999')
920            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
921
922            result_dict = result.asDict()
923            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
924
925            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
926            import json
927            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
928            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
929        """
930        if PY_3:
931            item_fn = self.items
932        else:
933            item_fn = self.iteritems
934
935        def toItem(obj):
936            if isinstance(obj, ParseResults):
937                if obj.haskeys():
938                    return obj.asDict()
939                else:
940                    return [toItem(v) for v in obj]
941            else:
942                return obj
943
944        return dict((k, toItem(v)) for k, v in item_fn())
945
946    def copy(self):
947        """
948        Returns a new copy of a :class:`ParseResults` object.
949        """
950        ret = ParseResults(self.__toklist)
951        ret.__tokdict = dict(self.__tokdict.items())
952        ret.__parent = self.__parent
953        ret.__accumNames.update(self.__accumNames)
954        ret.__name = self.__name
955        return ret
956
957    def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
958        """
959        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
960        """
961        nl = "\n"
962        out = []
963        namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items()
964                          for v in vlist)
965        nextLevelIndent = indent + "  "
966
967        # collapse out indents if formatting is not desired
968        if not formatted:
969            indent = ""
970            nextLevelIndent = ""
971            nl = ""
972
973        selfTag = None
974        if doctag is not None:
975            selfTag = doctag
976        else:
977            if self.__name:
978                selfTag = self.__name
979
980        if not selfTag:
981            if namedItemsOnly:
982                return ""
983            else:
984                selfTag = "ITEM"
985
986        out += [nl, indent, "<", selfTag, ">"]
987
988        for i, res in enumerate(self.__toklist):
989            if isinstance(res, ParseResults):
990                if i in namedItems:
991                    out += [res.asXML(namedItems[i],
992                                      namedItemsOnly and doctag is None,
993                                      nextLevelIndent,
994                                      formatted)]
995                else:
996                    out += [res.asXML(None,
997                                      namedItemsOnly and doctag is None,
998                                      nextLevelIndent,
999                                      formatted)]
1000            else:
1001                # individual token, see if there is a name for it
1002                resTag = None
1003                if i in namedItems:
1004                    resTag = namedItems[i]
1005                if not resTag:
1006                    if namedItemsOnly:
1007                        continue
1008                    else:
1009                        resTag = "ITEM"
1010                xmlBodyText = _xml_escape(_ustr(res))
1011                out += [nl, nextLevelIndent, "<", resTag, ">",
1012                        xmlBodyText,
1013                                                "</", resTag, ">"]
1014
1015        out += [nl, indent, "</", selfTag, ">"]
1016        return "".join(out)
1017
1018    def __lookup(self, sub):
1019        for k, vlist in self.__tokdict.items():
1020            for v, loc in vlist:
1021                if sub is v:
1022                    return k
1023        return None
1024
1025    def getName(self):
1026        r"""
1027        Returns the results name for this token expression. Useful when several
1028        different expressions might match at a particular location.
1029
1030        Example::
1031
1032            integer = Word(nums)
1033            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
1034            house_number_expr = Suppress('#') + Word(nums, alphanums)
1035            user_data = (Group(house_number_expr)("house_number")
1036                        | Group(ssn_expr)("ssn")
1037                        | Group(integer)("age"))
1038            user_info = OneOrMore(user_data)
1039
1040            result = user_info.parseString("22 111-22-3333 #221B")
1041            for item in result:
1042                print(item.getName(), ':', item[0])
1043
1044        prints::
1045
1046            age : 22
1047            ssn : 111-22-3333
1048            house_number : 221B
1049        """
1050        if self.__name:
1051            return self.__name
1052        elif self.__parent:
1053            par = self.__parent()
1054            if par:
1055                return par.__lookup(self)
1056            else:
1057                return None
1058        elif (len(self) == 1
1059              and len(self.__tokdict) == 1
1060              and next(iter(self.__tokdict.values()))[0][1] in (0, -1)):
1061            return next(iter(self.__tokdict.keys()))
1062        else:
1063            return None
1064
1065    def dump(self, indent='', full=True, include_list=True, _depth=0):
1066        """
1067        Diagnostic method for listing out the contents of
1068        a :class:`ParseResults`. Accepts an optional ``indent`` argument so
1069        that this string can be embedded in a nested display of other data.
1070
1071        Example::
1072
1073            integer = Word(nums)
1074            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1075
1076            result = date_str.parseString('12/31/1999')
1077            print(result.dump())
1078
1079        prints::
1080
1081            ['12', '/', '31', '/', '1999']
1082            - day: 1999
1083            - month: 31
1084            - year: 12
1085        """
1086        out = []
1087        NL = '\n'
1088        if include_list:
1089            out.append(indent + _ustr(self.asList()))
1090        else:
1091            out.append('')
1092
1093        if full:
1094            if self.haskeys():
1095                items = sorted((str(k), v) for k, v in self.items())
1096                for k, v in items:
1097                    if out:
1098                        out.append(NL)
1099                    out.append("%s%s- %s: " % (indent, ('  ' * _depth), k))
1100                    if isinstance(v, ParseResults):
1101                        if v:
1102                            out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1))
1103                        else:
1104                            out.append(_ustr(v))
1105                    else:
1106                        out.append(repr(v))
1107            elif any(isinstance(vv, ParseResults) for vv in self):
1108                v = self
1109                for i, vv in enumerate(v):
1110                    if isinstance(vv, ParseResults):
1111                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1112                                                            ('  ' * (_depth)),
1113                                                            i,
1114                                                            indent,
1115                                                            ('  ' * (_depth + 1)),
1116                                                            vv.dump(indent=indent,
1117                                                                    full=full,
1118                                                                    include_list=include_list,
1119                                                                    _depth=_depth + 1)))
1120                    else:
1121                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,
1122                                                            ('  ' * (_depth)),
1123                                                            i,
1124                                                            indent,
1125                                                            ('  ' * (_depth + 1)),
1126                                                            _ustr(vv)))
1127
1128        return "".join(out)
1129
1130    def pprint(self, *args, **kwargs):
1131        """
1132        Pretty-printer for parsed results as a list, using the
1133        `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
1134        Accepts additional positional or keyword args as defined for
1135        `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
1136
1137        Example::
1138
1139            ident = Word(alphas, alphanums)
1140            num = Word(nums)
1141            func = Forward()
1142            term = ident | num | Group('(' + func + ')')
1143            func <<= ident + Group(Optional(delimitedList(term)))
1144            result = func.parseString("fna a,b,(fnb c,d,200),100")
1145            result.pprint(width=40)
1146
1147        prints::
1148
1149            ['fna',
1150             ['a',
1151              'b',
1152              ['(', 'fnb', ['c', 'd', '200'], ')'],
1153              '100']]
1154        """
1155        pprint.pprint(self.asList(), *args, **kwargs)
1156
1157    # add support for pickle protocol
1158    def __getstate__(self):
1159        return (self.__toklist,
1160                (self.__tokdict.copy(),
1161                 self.__parent is not None and self.__parent() or None,
1162                 self.__accumNames,
1163                 self.__name))
1164
1165    def __setstate__(self, state):
1166        self.__toklist = state[0]
1167        self.__tokdict, par, inAccumNames, self.__name = state[1]
1168        self.__accumNames = {}
1169        self.__accumNames.update(inAccumNames)
1170        if par is not None:
1171            self.__parent = wkref(par)
1172        else:
1173            self.__parent = None
1174
1175    def __getnewargs__(self):
1176        return self.__toklist, self.__name, self.__asList, self.__modal
1177
1178    def __dir__(self):
1179        return dir(type(self)) + list(self.keys())
1180
1181    @classmethod
1182    def from_dict(cls, other, name=None):
1183        """
1184        Helper classmethod to construct a ParseResults from a dict, preserving the
1185        name-value relations as results names. If an optional 'name' argument is
1186        given, a nested ParseResults will be returned
1187        """
1188        def is_iterable(obj):
1189            try:
1190                iter(obj)
1191            except Exception:
1192                return False
1193            else:
1194                if PY_3:
1195                    return not isinstance(obj, (str, bytes))
1196                else:
1197                    return not isinstance(obj, basestring)
1198
1199        ret = cls([])
1200        for k, v in other.items():
1201            if isinstance(v, Mapping):
1202                ret += cls.from_dict(v, name=k)
1203            else:
1204                ret += cls([v], name=k, asList=is_iterable(v))
1205        if name is not None:
1206            ret = cls([ret], name=name)
1207        return ret
1208
1209MutableMapping.register(ParseResults)
1210
1211def col (loc, strg):
1212    """Returns current column within a string, counting newlines as line separators.
1213   The first column is number 1.
1214
1215   Note: the default parsing behavior is to expand tabs in the input string
1216   before starting the parsing process.  See
1217   :class:`ParserElement.parseString` for more
1218   information on parsing strings containing ``<TAB>`` s, and suggested
1219   methods to maintain a consistent view of the parsed string, the parse
1220   location, and line and column positions within the parsed string.
1221   """
1222    s = strg
1223    return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
1224
1225def lineno(loc, strg):
1226    """Returns current line number within a string, counting newlines as line separators.
1227    The first line is number 1.
1228
1229    Note - the default parsing behavior is to expand tabs in the input string
1230    before starting the parsing process.  See :class:`ParserElement.parseString`
1231    for more information on parsing strings containing ``<TAB>`` s, and
1232    suggested methods to maintain a consistent view of the parsed string, the
1233    parse location, and line and column positions within the parsed string.
1234    """
1235    return strg.count("\n", 0, loc) + 1
1236
1237def line(loc, strg):
1238    """Returns the line of text containing loc within a string, counting newlines as line separators.
1239       """
1240    lastCR = strg.rfind("\n", 0, loc)
1241    nextCR = strg.find("\n", loc)
1242    if nextCR >= 0:
1243        return strg[lastCR + 1:nextCR]
1244    else:
1245        return strg[lastCR + 1:]
1246
1247def _defaultStartDebugAction(instring, loc, expr):
1248    print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring))))
1249
1250def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
1251    print("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1252
1253def _defaultExceptionDebugAction(instring, loc, expr, exc):
1254    print("Exception raised:" + _ustr(exc))
1255
1256def nullDebugAction(*args):
1257    """'Do-nothing' debug action, to suppress debugging output during parsing."""
1258    pass
1259
1260# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1261#~ 'decorator to trim function calls to match the arity of the target'
1262#~ def _trim_arity(func, maxargs=3):
1263    #~ if func in singleArgBuiltins:
1264        #~ return lambda s,l,t: func(t)
1265    #~ limit = 0
1266    #~ foundArity = False
1267    #~ def wrapper(*args):
1268        #~ nonlocal limit,foundArity
1269        #~ while 1:
1270            #~ try:
1271                #~ ret = func(*args[limit:])
1272                #~ foundArity = True
1273                #~ return ret
1274            #~ except TypeError:
1275                #~ if limit == maxargs or foundArity:
1276                    #~ raise
1277                #~ limit += 1
1278                #~ continue
1279    #~ return wrapper
1280
1281# this version is Python 2.x-3.x cross-compatible
1282'decorator to trim function calls to match the arity of the target'
1283def _trim_arity(func, maxargs=2):
1284    if func in singleArgBuiltins:
1285        return lambda s, l, t: func(t)
1286    limit = [0]
1287    foundArity = [False]
1288
1289    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1290    if system_version[:2] >= (3, 5):
1291        def extract_stack(limit=0):
1292            # special handling for Python 3.5.0 - extra deep call stack by 1
1293            offset = -3 if system_version == (3, 5, 0) else -2
1294            frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset]
1295            return [frame_summary[:2]]
1296        def extract_tb(tb, limit=0):
1297            frames = traceback.extract_tb(tb, limit=limit)
1298            frame_summary = frames[-1]
1299            return [frame_summary[:2]]
1300    else:
1301        extract_stack = traceback.extract_stack
1302        extract_tb = traceback.extract_tb
1303
1304    # synthesize what would be returned by traceback.extract_stack at the call to
1305    # user's parse action 'func', so that we don't incur call penalty at parse time
1306
1307    LINE_DIFF = 6
1308    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1309    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1310    this_line = extract_stack(limit=2)[-1]
1311    pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF)
1312
1313    def wrapper(*args):
1314        while 1:
1315            try:
1316                ret = func(*args[limit[0]:])
1317                foundArity[0] = True
1318                return ret
1319            except TypeError:
1320                # re-raise TypeErrors if they did not come from our arity testing
1321                if foundArity[0]:
1322                    raise
1323                else:
1324                    try:
1325                        tb = sys.exc_info()[-1]
1326                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1327                            raise
1328                    finally:
1329                        try:
1330                            del tb
1331                        except NameError:
1332                            pass
1333
1334                if limit[0] <= maxargs:
1335                    limit[0] += 1
1336                    continue
1337                raise
1338
1339    # copy func name to wrapper for sensible debug output
1340    func_name = "<parse action>"
1341    try:
1342        func_name = getattr(func, '__name__',
1343                            getattr(func, '__class__').__name__)
1344    except Exception:
1345        func_name = str(func)
1346    wrapper.__name__ = func_name
1347
1348    return wrapper
1349
1350
1351class ParserElement(object):
1352    """Abstract base level parser element class."""
1353    DEFAULT_WHITE_CHARS = " \n\t\r"
1354    verbose_stacktrace = False
1355
1356    @staticmethod
1357    def setDefaultWhitespaceChars(chars):
1358        r"""
1359        Overrides the default whitespace chars
1360
1361        Example::
1362
1363            # default whitespace chars are space, <TAB> and newline
1364            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
1365
1366            # change to just treat newline as significant
1367            ParserElement.setDefaultWhitespaceChars(" \t")
1368            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
1369        """
1370        ParserElement.DEFAULT_WHITE_CHARS = chars
1371
1372    @staticmethod
1373    def inlineLiteralsUsing(cls):
1374        """
1375        Set class to be used for inclusion of string literals into a parser.
1376
1377        Example::
1378
1379            # default literal class used is Literal
1380            integer = Word(nums)
1381            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1382
1383            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1384
1385
1386            # change to Suppress
1387            ParserElement.inlineLiteralsUsing(Suppress)
1388            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1389
1390            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
1391        """
1392        ParserElement._literalStringClass = cls
1393
1394    @classmethod
1395    def _trim_traceback(cls, tb):
1396        while tb.tb_next:
1397            tb = tb.tb_next
1398        return tb
1399
1400    def __init__(self, savelist=False):
1401        self.parseAction = list()
1402        self.failAction = None
1403        # ~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
1404        self.strRepr = None
1405        self.resultsName = None
1406        self.saveAsList = savelist
1407        self.skipWhitespace = True
1408        self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS)
1409        self.copyDefaultWhiteChars = True
1410        self.mayReturnEmpty = False # used when checking for left-recursion
1411        self.keepTabs = False
1412        self.ignoreExprs = list()
1413        self.debug = False
1414        self.streamlined = False
1415        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1416        self.errmsg = ""
1417        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1418        self.debugActions = (None, None, None)  # custom debug actions
1419        self.re = None
1420        self.callPreparse = True # used to avoid redundant calls to preParse
1421        self.callDuringTry = False
1422
1423    def copy(self):
1424        """
1425        Make a copy of this :class:`ParserElement`.  Useful for defining
1426        different parse actions for the same parsing pattern, using copies of
1427        the original parse element.
1428
1429        Example::
1430
1431            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1432            integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K")
1433            integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1434
1435            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1436
1437        prints::
1438
1439            [5120, 100, 655360, 268435456]
1440
1441        Equivalent form of ``expr.copy()`` is just ``expr()``::
1442
1443            integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M")
1444        """
1445        cpy = copy.copy(self)
1446        cpy.parseAction = self.parseAction[:]
1447        cpy.ignoreExprs = self.ignoreExprs[:]
1448        if self.copyDefaultWhiteChars:
1449            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1450        return cpy
1451
1452    def setName(self, name):
1453        """
1454        Define name for this expression, makes debugging and exception messages clearer.
1455
1456        Example::
1457
1458            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1459            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
1460        """
1461        self.name = name
1462        self.errmsg = "Expected " + self.name
1463        if __diag__.enable_debug_on_named_expressions:
1464            self.setDebug()
1465        return self
1466
1467    def setResultsName(self, name, listAllMatches=False):
1468        """
1469        Define name for referencing matching tokens as a nested attribute
1470        of the returned parse results.
1471        NOTE: this returns a *copy* of the original :class:`ParserElement` object;
1472        this is so that the client can define a basic element, such as an
1473        integer, and reference it in multiple places with different names.
1474
1475        You can also set results names using the abbreviated syntax,
1476        ``expr("name")`` in place of ``expr.setResultsName("name")``
1477        - see :class:`__call__`.
1478
1479        Example::
1480
1481            date_str = (integer.setResultsName("year") + '/'
1482                        + integer.setResultsName("month") + '/'
1483                        + integer.setResultsName("day"))
1484
1485            # equivalent form:
1486            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1487        """
1488        return self._setResultsName(name, listAllMatches)
1489
1490    def _setResultsName(self, name, listAllMatches=False):
1491        newself = self.copy()
1492        if name.endswith("*"):
1493            name = name[:-1]
1494            listAllMatches = True
1495        newself.resultsName = name
1496        newself.modalResults = not listAllMatches
1497        return newself
1498
1499    def setBreak(self, breakFlag=True):
1500        """Method to invoke the Python pdb debugger when this element is
1501           about to be parsed. Set ``breakFlag`` to True to enable, False to
1502           disable.
1503        """
1504        if breakFlag:
1505            _parseMethod = self._parse
1506            def breaker(instring, loc, doActions=True, callPreParse=True):
1507                import pdb
1508                # this call to pdb.set_trace() is intentional, not a checkin error
1509                pdb.set_trace()
1510                return _parseMethod(instring, loc, doActions, callPreParse)
1511            breaker._originalParseMethod = _parseMethod
1512            self._parse = breaker
1513        else:
1514            if hasattr(self._parse, "_originalParseMethod"):
1515                self._parse = self._parse._originalParseMethod
1516        return self
1517
1518    def setParseAction(self, *fns, **kwargs):
1519        """
1520        Define one or more actions to perform when successfully matching parse element definition.
1521        Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` ,
1522        ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where:
1523
1524        - s   = the original string being parsed (see note below)
1525        - loc = the location of the matching substring
1526        - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object
1527
1528        If the functions in fns modify the tokens, they can return them as the return
1529        value from fn, and the modified list of tokens will replace the original.
1530        Otherwise, fn does not need to return any value.
1531
1532        If None is passed as the parse action, all previously added parse actions for this
1533        expression are cleared.
1534
1535        Optional keyword arguments:
1536        - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing
1537
1538        Note: the default parsing behavior is to expand tabs in the input string
1539        before starting the parsing process.  See :class:`parseString for more
1540        information on parsing strings containing ``<TAB>`` s, and suggested
1541        methods to maintain a consistent view of the parsed string, the parse
1542        location, and line and column positions within the parsed string.
1543
1544        Example::
1545
1546            integer = Word(nums)
1547            date_str = integer + '/' + integer + '/' + integer
1548
1549            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1550
1551            # use parse action to convert to ints at parse time
1552            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1553            date_str = integer + '/' + integer + '/' + integer
1554
1555            # note that integer fields are now ints, not strings
1556            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
1557        """
1558        if list(fns) == [None,]:
1559            self.parseAction = []
1560        else:
1561            if not all(callable(fn) for fn in fns):
1562                raise TypeError("parse actions must be callable")
1563            self.parseAction = list(map(_trim_arity, list(fns)))
1564            self.callDuringTry = kwargs.get("callDuringTry", False)
1565        return self
1566
1567    def addParseAction(self, *fns, **kwargs):
1568        """
1569        Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`.
1570
1571        See examples in :class:`copy`.
1572        """
1573        self.parseAction += list(map(_trim_arity, list(fns)))
1574        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1575        return self
1576
1577    def addCondition(self, *fns, **kwargs):
1578        """Add a boolean predicate function to expression's list of parse actions. See
1579        :class:`setParseAction` for function call signatures. Unlike ``setParseAction``,
1580        functions passed to ``addCondition`` need to return boolean success/fail of the condition.
1581
1582        Optional keyword arguments:
1583        - message = define a custom message to be used in the raised exception
1584        - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1585
1586        Example::
1587
1588            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1589            year_int = integer.copy()
1590            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1591            date_str = year_int + '/' + integer + '/' + integer
1592
1593            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1594        """
1595        for fn in fns:
1596            self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'),
1597                                                           fatal=kwargs.get('fatal', False)))
1598
1599        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1600        return self
1601
1602    def setFailAction(self, fn):
1603        """Define action to perform if parsing fails at this expression.
1604           Fail acton fn is a callable function that takes the arguments
1605           ``fn(s, loc, expr, err)`` where:
1606           - s = string being parsed
1607           - loc = location where expression match was attempted and failed
1608           - expr = the parse expression that failed
1609           - err = the exception thrown
1610           The function returns no value.  It may throw :class:`ParseFatalException`
1611           if it is desired to stop parsing immediately."""
1612        self.failAction = fn
1613        return self
1614
1615    def _skipIgnorables(self, instring, loc):
1616        exprsFound = True
1617        while exprsFound:
1618            exprsFound = False
1619            for e in self.ignoreExprs:
1620                try:
1621                    while 1:
1622                        loc, dummy = e._parse(instring, loc)
1623                        exprsFound = True
1624                except ParseException:
1625                    pass
1626        return loc
1627
1628    def preParse(self, instring, loc):
1629        if self.ignoreExprs:
1630            loc = self._skipIgnorables(instring, loc)
1631
1632        if self.skipWhitespace:
1633            wt = self.whiteChars
1634            instrlen = len(instring)
1635            while loc < instrlen and instring[loc] in wt:
1636                loc += 1
1637
1638        return loc
1639
1640    def parseImpl(self, instring, loc, doActions=True):
1641        return loc, []
1642
1643    def postParse(self, instring, loc, tokenlist):
1644        return tokenlist
1645
1646    # ~ @profile
1647    def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
1648        TRY, MATCH, FAIL = 0, 1, 2
1649        debugging = (self.debug)  # and doActions)
1650
1651        if debugging or self.failAction:
1652            # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring)))
1653            if self.debugActions[TRY]:
1654                self.debugActions[TRY](instring, loc, self)
1655            try:
1656                if callPreParse and self.callPreparse:
1657                    preloc = self.preParse(instring, loc)
1658                else:
1659                    preloc = loc
1660                tokensStart = preloc
1661                if self.mayIndexError or preloc >= len(instring):
1662                    try:
1663                        loc, tokens = self.parseImpl(instring, preloc, doActions)
1664                    except IndexError:
1665                        raise ParseException(instring, len(instring), self.errmsg, self)
1666                else:
1667                    loc, tokens = self.parseImpl(instring, preloc, doActions)
1668            except Exception as err:
1669                # ~ print ("Exception raised:", err)
1670                if self.debugActions[FAIL]:
1671                    self.debugActions[FAIL](instring, tokensStart, self, err)
1672                if self.failAction:
1673                    self.failAction(instring, tokensStart, self, err)
1674                raise
1675        else:
1676            if callPreParse and self.callPreparse:
1677                preloc = self.preParse(instring, loc)
1678            else:
1679                preloc = loc
1680            tokensStart = preloc
1681            if self.mayIndexError or preloc >= len(instring):
1682                try:
1683                    loc, tokens = self.parseImpl(instring, preloc, doActions)
1684                except IndexError:
1685                    raise ParseException(instring, len(instring), self.errmsg, self)
1686            else:
1687                loc, tokens = self.parseImpl(instring, preloc, doActions)
1688
1689        tokens = self.postParse(instring, loc, tokens)
1690
1691        retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
1692        if self.parseAction and (doActions or self.callDuringTry):
1693            if debugging:
1694                try:
1695                    for fn in self.parseAction:
1696                        try:
1697                            tokens = fn(instring, tokensStart, retTokens)
1698                        except IndexError as parse_action_exc:
1699                            exc = ParseException("exception raised in parse action")
1700                            exc.__cause__ = parse_action_exc
1701                            raise exc
1702
1703                        if tokens is not None and tokens is not retTokens:
1704                            retTokens = ParseResults(tokens,
1705                                                      self.resultsName,
1706                                                      asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1707                                                      modal=self.modalResults)
1708                except Exception as err:
1709                    # ~ print "Exception raised in user parse action:", err
1710                    if self.debugActions[FAIL]:
1711                        self.debugActions[FAIL](instring, tokensStart, self, err)
1712                    raise
1713            else:
1714                for fn in self.parseAction:
1715                    try:
1716                        tokens = fn(instring, tokensStart, retTokens)
1717                    except IndexError as parse_action_exc:
1718                        exc = ParseException("exception raised in parse action")
1719                        exc.__cause__ = parse_action_exc
1720                        raise exc
1721
1722                    if tokens is not None and tokens is not retTokens:
1723                        retTokens = ParseResults(tokens,
1724                                                  self.resultsName,
1725                                                  asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
1726                                                  modal=self.modalResults)
1727        if debugging:
1728            # ~ print ("Matched", self, "->", retTokens.asList())
1729            if self.debugActions[MATCH]:
1730                self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens)
1731
1732        return loc, retTokens
1733
1734    def tryParse(self, instring, loc):
1735        try:
1736            return self._parse(instring, loc, doActions=False)[0]
1737        except ParseFatalException:
1738            raise ParseException(instring, loc, self.errmsg, self)
1739
1740    def canParseNext(self, instring, loc):
1741        try:
1742            self.tryParse(instring, loc)
1743        except (ParseException, IndexError):
1744            return False
1745        else:
1746            return True
1747
1748    class _UnboundedCache(object):
1749        def __init__(self):
1750            cache = {}
1751            self.not_in_cache = not_in_cache = object()
1752
1753            def get(self, key):
1754                return cache.get(key, not_in_cache)
1755
1756            def set(self, key, value):
1757                cache[key] = value
1758
1759            def clear(self):
1760                cache.clear()
1761
1762            def cache_len(self):
1763                return len(cache)
1764
1765            self.get = types.MethodType(get, self)
1766            self.set = types.MethodType(set, self)
1767            self.clear = types.MethodType(clear, self)
1768            self.__len__ = types.MethodType(cache_len, self)
1769
1770    if _OrderedDict is not None:
1771        class _FifoCache(object):
1772            def __init__(self, size):
1773                self.not_in_cache = not_in_cache = object()
1774
1775                cache = _OrderedDict()
1776
1777                def get(self, key):
1778                    return cache.get(key, not_in_cache)
1779
1780                def set(self, key, value):
1781                    cache[key] = value
1782                    while len(cache) > size:
1783                        try:
1784                            cache.popitem(False)
1785                        except KeyError:
1786                            pass
1787
1788                def clear(self):
1789                    cache.clear()
1790
1791                def cache_len(self):
1792                    return len(cache)
1793
1794                self.get = types.MethodType(get, self)
1795                self.set = types.MethodType(set, self)
1796                self.clear = types.MethodType(clear, self)
1797                self.__len__ = types.MethodType(cache_len, self)
1798
1799    else:
1800        class _FifoCache(object):
1801            def __init__(self, size):
1802                self.not_in_cache = not_in_cache = object()
1803
1804                cache = {}
1805                key_fifo = collections.deque([], size)
1806
1807                def get(self, key):
1808                    return cache.get(key, not_in_cache)
1809
1810                def set(self, key, value):
1811                    cache[key] = value
1812                    while len(key_fifo) > size:
1813                        cache.pop(key_fifo.popleft(), None)
1814                    key_fifo.append(key)
1815
1816                def clear(self):
1817                    cache.clear()
1818                    key_fifo.clear()
1819
1820                def cache_len(self):
1821                    return len(cache)
1822
1823                self.get = types.MethodType(get, self)
1824                self.set = types.MethodType(set, self)
1825                self.clear = types.MethodType(clear, self)
1826                self.__len__ = types.MethodType(cache_len, self)
1827
1828    # argument cache for optimizing repeated calls when backtracking through recursive expressions
1829    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1830    packrat_cache_lock = RLock()
1831    packrat_cache_stats = [0, 0]
1832
1833    # this method gets repeatedly called during backtracking with the same arguments -
1834    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1835    def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
1836        HIT, MISS = 0, 1
1837        lookup = (self, instring, loc, callPreParse, doActions)
1838        with ParserElement.packrat_cache_lock:
1839            cache = ParserElement.packrat_cache
1840            value = cache.get(lookup)
1841            if value is cache.not_in_cache:
1842                ParserElement.packrat_cache_stats[MISS] += 1
1843                try:
1844                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
1845                except ParseBaseException as pe:
1846                    # cache a copy of the exception, without the traceback
1847                    cache.set(lookup, pe.__class__(*pe.args))
1848                    raise
1849                else:
1850                    cache.set(lookup, (value[0], value[1].copy()))
1851                    return value
1852            else:
1853                ParserElement.packrat_cache_stats[HIT] += 1
1854                if isinstance(value, Exception):
1855                    raise value
1856                return value[0], value[1].copy()
1857
1858    _parse = _parseNoCache
1859
1860    @staticmethod
1861    def resetCache():
1862        ParserElement.packrat_cache.clear()
1863        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1864
1865    _packratEnabled = False
1866    @staticmethod
1867    def enablePackrat(cache_size_limit=128):
1868        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1869           Repeated parse attempts at the same string location (which happens
1870           often in many complex grammars) can immediately return a cached value,
1871           instead of re-executing parsing/validating code.  Memoizing is done of
1872           both valid results and parsing exceptions.
1873
1874           Parameters:
1875
1876           - cache_size_limit - (default= ``128``) - if an integer value is provided
1877             will limit the size of the packrat cache; if None is passed, then
1878             the cache size will be unbounded; if 0 is passed, the cache will
1879             be effectively disabled.
1880
1881           This speedup may break existing programs that use parse actions that
1882           have side-effects.  For this reason, packrat parsing is disabled when
1883           you first import pyparsing.  To activate the packrat feature, your
1884           program must call the class method :class:`ParserElement.enablePackrat`.
1885           For best results, call ``enablePackrat()`` immediately after
1886           importing pyparsing.
1887
1888           Example::
1889
1890               import pyparsing
1891               pyparsing.ParserElement.enablePackrat()
1892        """
1893        if not ParserElement._packratEnabled:
1894            ParserElement._packratEnabled = True
1895            if cache_size_limit is None:
1896                ParserElement.packrat_cache = ParserElement._UnboundedCache()
1897            else:
1898                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1899            ParserElement._parse = ParserElement._parseCache
1900
1901    def parseString(self, instring, parseAll=False):
1902        """
1903        Execute the parse expression with the given string.
1904        This is the main interface to the client code, once the complete
1905        expression has been built.
1906
1907        Returns the parsed data as a :class:`ParseResults` object, which may be
1908        accessed as a list, or as a dict or object with attributes if the given parser
1909        includes results names.
1910
1911        If you want the grammar to require that the entire input string be
1912        successfully parsed, then set ``parseAll`` to True (equivalent to ending
1913        the grammar with ``StringEnd()``).
1914
1915        Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string,
1916        in order to report proper column numbers in parse actions.
1917        If the input string contains tabs and
1918        the grammar uses parse actions that use the ``loc`` argument to index into the
1919        string being parsed, you can ensure you have a consistent view of the input
1920        string by:
1921
1922        - calling ``parseWithTabs`` on your grammar before calling ``parseString``
1923          (see :class:`parseWithTabs`)
1924        - define your parse action using the full ``(s, loc, toks)`` signature, and
1925          reference the input string using the parse action's ``s`` argument
1926        - explictly expand the tabs in your input string before calling
1927          ``parseString``
1928
1929        Example::
1930
1931            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
1932            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
1933        """
1934        ParserElement.resetCache()
1935        if not self.streamlined:
1936            self.streamline()
1937            # ~ self.saveAsList = True
1938        for e in self.ignoreExprs:
1939            e.streamline()
1940        if not self.keepTabs:
1941            instring = instring.expandtabs()
1942        try:
1943            loc, tokens = self._parse(instring, 0)
1944            if parseAll:
1945                loc = self.preParse(instring, loc)
1946                se = Empty() + StringEnd()
1947                se._parse(instring, loc)
1948        except ParseBaseException as exc:
1949            if ParserElement.verbose_stacktrace:
1950                raise
1951            else:
1952                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
1953                if getattr(exc, '__traceback__', None) is not None:
1954                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
1955                raise exc
1956        else:
1957            return tokens
1958
1959    def scanString(self, instring, maxMatches=_MAX_INT, overlap=False):
1960        """
1961        Scan the input string for expression matches.  Each match will return the
1962        matching tokens, start location, and end location.  May be called with optional
1963        ``maxMatches`` argument, to clip scanning after 'n' matches are found.  If
1964        ``overlap`` is specified, then overlapping matches will be reported.
1965
1966        Note that the start and end locations are reported relative to the string
1967        being parsed.  See :class:`parseString` for more information on parsing
1968        strings with embedded tabs.
1969
1970        Example::
1971
1972            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1973            print(source)
1974            for tokens, start, end in Word(alphas).scanString(source):
1975                print(' '*start + '^'*(end-start))
1976                print(' '*start + tokens[0])
1977
1978        prints::
1979
1980            sldjf123lsdjjkf345sldkjf879lkjsfd987
1981            ^^^^^
1982            sldjf
1983                    ^^^^^^^
1984                    lsdjjkf
1985                              ^^^^^^
1986                              sldkjf
1987                                       ^^^^^^
1988                                       lkjsfd
1989        """
1990        if not self.streamlined:
1991            self.streamline()
1992        for e in self.ignoreExprs:
1993            e.streamline()
1994
1995        if not self.keepTabs:
1996            instring = _ustr(instring).expandtabs()
1997        instrlen = len(instring)
1998        loc = 0
1999        preparseFn = self.preParse
2000        parseFn = self._parse
2001        ParserElement.resetCache()
2002        matches = 0
2003        try:
2004            while loc <= instrlen and matches < maxMatches:
2005                try:
2006                    preloc = preparseFn(instring, loc)
2007                    nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
2008                except ParseException:
2009                    loc = preloc + 1
2010                else:
2011                    if nextLoc > loc:
2012                        matches += 1
2013                        yield tokens, preloc, nextLoc
2014                        if overlap:
2015                            nextloc = preparseFn(instring, loc)
2016                            if nextloc > loc:
2017                                loc = nextLoc
2018                            else:
2019                                loc += 1
2020                        else:
2021                            loc = nextLoc
2022                    else:
2023                        loc = preloc + 1
2024        except ParseBaseException as exc:
2025            if ParserElement.verbose_stacktrace:
2026                raise
2027            else:
2028                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2029                if getattr(exc, '__traceback__', None) is not None:
2030                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2031                raise exc
2032
2033    def transformString(self, instring):
2034        """
2035        Extension to :class:`scanString`, to modify matching text with modified tokens that may
2036        be returned from a parse action.  To use ``transformString``, define a grammar and
2037        attach a parse action to it that modifies the returned token list.
2038        Invoking ``transformString()`` on a target string will then scan for matches,
2039        and replace the matched text patterns according to the logic in the parse
2040        action.  ``transformString()`` returns the resulting transformed string.
2041
2042        Example::
2043
2044            wd = Word(alphas)
2045            wd.setParseAction(lambda toks: toks[0].title())
2046
2047            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
2048
2049        prints::
2050
2051            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
2052        """
2053        out = []
2054        lastE = 0
2055        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
2056        # keep string locs straight between transformString and scanString
2057        self.keepTabs = True
2058        try:
2059            for t, s, e in self.scanString(instring):
2060                out.append(instring[lastE:s])
2061                if t:
2062                    if isinstance(t, ParseResults):
2063                        out += t.asList()
2064                    elif isinstance(t, list):
2065                        out += t
2066                    else:
2067                        out.append(t)
2068                lastE = e
2069            out.append(instring[lastE:])
2070            out = [o for o in out if o]
2071            return "".join(map(_ustr, _flatten(out)))
2072        except ParseBaseException as exc:
2073            if ParserElement.verbose_stacktrace:
2074                raise
2075            else:
2076                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2077                if getattr(exc, '__traceback__', None) is not None:
2078                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2079                raise exc
2080
2081    def searchString(self, instring, maxMatches=_MAX_INT):
2082        """
2083        Another extension to :class:`scanString`, simplifying the access to the tokens found
2084        to match the given parse expression.  May be called with optional
2085        ``maxMatches`` argument, to clip searching after 'n' matches are found.
2086
2087        Example::
2088
2089            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
2090            cap_word = Word(alphas.upper(), alphas.lower())
2091
2092            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
2093
2094            # the sum() builtin can be used to merge results into a single ParseResults object
2095            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
2096
2097        prints::
2098
2099            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
2100            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
2101        """
2102        try:
2103            return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
2104        except ParseBaseException as exc:
2105            if ParserElement.verbose_stacktrace:
2106                raise
2107            else:
2108                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2109                if getattr(exc, '__traceback__', None) is not None:
2110                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2111                raise exc
2112
2113    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
2114        """
2115        Generator method to split a string using the given expression as a separator.
2116        May be called with optional ``maxsplit`` argument, to limit the number of splits;
2117        and the optional ``includeSeparators`` argument (default= ``False``), if the separating
2118        matching text should be included in the split results.
2119
2120        Example::
2121
2122            punc = oneOf(list(".,;:/-!?"))
2123            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
2124
2125        prints::
2126
2127            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
2128        """
2129        splits = 0
2130        last = 0
2131        for t, s, e in self.scanString(instring, maxMatches=maxsplit):
2132            yield instring[last:s]
2133            if includeSeparators:
2134                yield t[0]
2135            last = e
2136        yield instring[last:]
2137
2138    def __add__(self, other):
2139        """
2140        Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement
2141        converts them to :class:`Literal`s by default.
2142
2143        Example::
2144
2145            greet = Word(alphas) + "," + Word(alphas) + "!"
2146            hello = "Hello, World!"
2147            print (hello, "->", greet.parseString(hello))
2148
2149        prints::
2150
2151            Hello, World! -> ['Hello', ',', 'World', '!']
2152
2153        ``...`` may be used as a parse expression as a short form of :class:`SkipTo`.
2154
2155            Literal('start') + ... + Literal('end')
2156
2157        is equivalent to:
2158
2159            Literal('start') + SkipTo('end')("_skipped*") + Literal('end')
2160
2161        Note that the skipped text is returned with '_skipped' as a results name,
2162        and to support having multiple skips in the same parser, the value returned is
2163        a list of all skipped text.
2164        """
2165        if other is Ellipsis:
2166            return _PendingSkip(self)
2167
2168        if isinstance(other, basestring):
2169            other = self._literalStringClass(other)
2170        if not isinstance(other, ParserElement):
2171            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2172                          SyntaxWarning, stacklevel=2)
2173            return None
2174        return And([self, other])
2175
2176    def __radd__(self, other):
2177        """
2178        Implementation of + operator when left operand is not a :class:`ParserElement`
2179        """
2180        if other is Ellipsis:
2181            return SkipTo(self)("_skipped*") + self
2182
2183        if isinstance(other, basestring):
2184            other = self._literalStringClass(other)
2185        if not isinstance(other, ParserElement):
2186            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2187                          SyntaxWarning, stacklevel=2)
2188            return None
2189        return other + self
2190
2191    def __sub__(self, other):
2192        """
2193        Implementation of - operator, returns :class:`And` with error stop
2194        """
2195        if isinstance(other, basestring):
2196            other = self._literalStringClass(other)
2197        if not isinstance(other, ParserElement):
2198            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2199                          SyntaxWarning, stacklevel=2)
2200            return None
2201        return self + And._ErrorStop() + other
2202
2203    def __rsub__(self, other):
2204        """
2205        Implementation of - operator when left operand is not a :class:`ParserElement`
2206        """
2207        if isinstance(other, basestring):
2208            other = self._literalStringClass(other)
2209        if not isinstance(other, ParserElement):
2210            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2211                          SyntaxWarning, stacklevel=2)
2212            return None
2213        return other - self
2214
2215    def __mul__(self, other):
2216        """
2217        Implementation of * operator, allows use of ``expr * 3`` in place of
2218        ``expr + expr + expr``.  Expressions may also me multiplied by a 2-integer
2219        tuple, similar to ``{min, max}`` multipliers in regular expressions.  Tuples
2220        may also include ``None`` as in:
2221         - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent
2222              to ``expr*n + ZeroOrMore(expr)``
2223              (read as "at least n instances of ``expr``")
2224         - ``expr*(None, n)`` is equivalent to ``expr*(0, n)``
2225              (read as "0 to n instances of ``expr``")
2226         - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)``
2227         - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)``
2228
2229        Note that ``expr*(None, n)`` does not raise an exception if
2230        more than n exprs exist in the input stream; that is,
2231        ``expr*(None, n)`` does not enforce a maximum number of expr
2232        occurrences.  If this behavior is desired, then write
2233        ``expr*(None, n) + ~expr``
2234        """
2235        if other is Ellipsis:
2236            other = (0, None)
2237        elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
2238            other = ((0, ) + other[1:] + (None,))[:2]
2239
2240        if isinstance(other, int):
2241            minElements, optElements = other, 0
2242        elif isinstance(other, tuple):
2243            other = tuple(o if o is not Ellipsis else None for o in other)
2244            other = (other + (None, None))[:2]
2245            if other[0] is None:
2246                other = (0, other[1])
2247            if isinstance(other[0], int) and other[1] is None:
2248                if other[0] == 0:
2249                    return ZeroOrMore(self)
2250                if other[0] == 1:
2251                    return OneOrMore(self)
2252                else:
2253                    return self * other[0] + ZeroOrMore(self)
2254            elif isinstance(other[0], int) and isinstance(other[1], int):
2255                minElements, optElements = other
2256                optElements -= minElements
2257            else:
2258                raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1]))
2259        else:
2260            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
2261
2262        if minElements < 0:
2263            raise ValueError("cannot multiply ParserElement by negative value")
2264        if optElements < 0:
2265            raise ValueError("second tuple value must be greater or equal to first tuple value")
2266        if minElements == optElements == 0:
2267            raise ValueError("cannot multiply ParserElement by 0 or (0, 0)")
2268
2269        if optElements:
2270            def makeOptionalList(n):
2271                if n > 1:
2272                    return Optional(self + makeOptionalList(n - 1))
2273                else:
2274                    return Optional(self)
2275            if minElements:
2276                if minElements == 1:
2277                    ret = self + makeOptionalList(optElements)
2278                else:
2279                    ret = And([self] * minElements) + makeOptionalList(optElements)
2280            else:
2281                ret = makeOptionalList(optElements)
2282        else:
2283            if minElements == 1:
2284                ret = self
2285            else:
2286                ret = And([self] * minElements)
2287        return ret
2288
2289    def __rmul__(self, other):
2290        return self.__mul__(other)
2291
2292    def __or__(self, other):
2293        """
2294        Implementation of | operator - returns :class:`MatchFirst`
2295        """
2296        if other is Ellipsis:
2297            return _PendingSkip(self, must_skip=True)
2298
2299        if isinstance(other, basestring):
2300            other = self._literalStringClass(other)
2301        if not isinstance(other, ParserElement):
2302            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2303                          SyntaxWarning, stacklevel=2)
2304            return None
2305        return MatchFirst([self, other])
2306
2307    def __ror__(self, other):
2308        """
2309        Implementation of | operator when left operand is not a :class:`ParserElement`
2310        """
2311        if isinstance(other, basestring):
2312            other = self._literalStringClass(other)
2313        if not isinstance(other, ParserElement):
2314            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2315                          SyntaxWarning, stacklevel=2)
2316            return None
2317        return other | self
2318
2319    def __xor__(self, other):
2320        """
2321        Implementation of ^ operator - returns :class:`Or`
2322        """
2323        if isinstance(other, basestring):
2324            other = self._literalStringClass(other)
2325        if not isinstance(other, ParserElement):
2326            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2327                          SyntaxWarning, stacklevel=2)
2328            return None
2329        return Or([self, other])
2330
2331    def __rxor__(self, other):
2332        """
2333        Implementation of ^ operator when left operand is not a :class:`ParserElement`
2334        """
2335        if isinstance(other, basestring):
2336            other = self._literalStringClass(other)
2337        if not isinstance(other, ParserElement):
2338            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2339                          SyntaxWarning, stacklevel=2)
2340            return None
2341        return other ^ self
2342
2343    def __and__(self, other):
2344        """
2345        Implementation of & operator - returns :class:`Each`
2346        """
2347        if isinstance(other, basestring):
2348            other = self._literalStringClass(other)
2349        if not isinstance(other, ParserElement):
2350            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2351                          SyntaxWarning, stacklevel=2)
2352            return None
2353        return Each([self, other])
2354
2355    def __rand__(self, other):
2356        """
2357        Implementation of & operator when left operand is not a :class:`ParserElement`
2358        """
2359        if isinstance(other, basestring):
2360            other = self._literalStringClass(other)
2361        if not isinstance(other, ParserElement):
2362            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2363                          SyntaxWarning, stacklevel=2)
2364            return None
2365        return other & self
2366
2367    def __invert__(self):
2368        """
2369        Implementation of ~ operator - returns :class:`NotAny`
2370        """
2371        return NotAny(self)
2372
2373    def __iter__(self):
2374        # must implement __iter__ to override legacy use of sequential access to __getitem__ to
2375        # iterate over a sequence
2376        raise TypeError('%r object is not iterable' % self.__class__.__name__)
2377
2378    def __getitem__(self, key):
2379        """
2380        use ``[]`` indexing notation as a short form for expression repetition:
2381         - ``expr[n]`` is equivalent to ``expr*n``
2382         - ``expr[m, n]`` is equivalent to ``expr*(m, n)``
2383         - ``expr[n, ...]`` or ``expr[n,]`` is equivalent
2384              to ``expr*n + ZeroOrMore(expr)``
2385              (read as "at least n instances of ``expr``")
2386         - ``expr[..., n]`` is equivalent to ``expr*(0, n)``
2387              (read as "0 to n instances of ``expr``")
2388         - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)``
2389         - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)``
2390         ``None`` may be used in place of ``...``.
2391
2392        Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception
2393        if more than ``n`` ``expr``s exist in the input stream.  If this behavior is
2394        desired, then write ``expr[..., n] + ~expr``.
2395       """
2396
2397        # convert single arg keys to tuples
2398        try:
2399            if isinstance(key, str):
2400                key = (key,)
2401            iter(key)
2402        except TypeError:
2403            key = (key, key)
2404
2405        if len(key) > 2:
2406            warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5],
2407                                                                                '... [{0}]'.format(len(key))
2408                                                                                if len(key) > 5 else ''))
2409
2410        # clip to 2 elements
2411        ret = self * tuple(key[:2])
2412        return ret
2413
2414    def __call__(self, name=None):
2415        """
2416        Shortcut for :class:`setResultsName`, with ``listAllMatches=False``.
2417
2418        If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be
2419        passed as ``True``.
2420
2421        If ``name` is omitted, same as calling :class:`copy`.
2422
2423        Example::
2424
2425            # these are equivalent
2426            userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno")
2427            userdata = Word(alphas)("name") + Word(nums + "-")("socsecno")
2428        """
2429        if name is not None:
2430            return self._setResultsName(name)
2431        else:
2432            return self.copy()
2433
2434    def suppress(self):
2435        """
2436        Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from
2437        cluttering up returned output.
2438        """
2439        return Suppress(self)
2440
2441    def leaveWhitespace(self):
2442        """
2443        Disables the skipping of whitespace before matching the characters in the
2444        :class:`ParserElement`'s defined pattern.  This is normally only used internally by
2445        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2446        """
2447        self.skipWhitespace = False
2448        return self
2449
2450    def setWhitespaceChars(self, chars):
2451        """
2452        Overrides the default whitespace chars
2453        """
2454        self.skipWhitespace = True
2455        self.whiteChars = chars
2456        self.copyDefaultWhiteChars = False
2457        return self
2458
2459    def parseWithTabs(self):
2460        """
2461        Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string.
2462        Must be called before ``parseString`` when the input grammar contains elements that
2463        match ``<TAB>`` characters.
2464        """
2465        self.keepTabs = True
2466        return self
2467
2468    def ignore(self, other):
2469        """
2470        Define expression to be ignored (e.g., comments) while doing pattern
2471        matching; may be called repeatedly, to define multiple comment or other
2472        ignorable patterns.
2473
2474        Example::
2475
2476            patt = OneOrMore(Word(alphas))
2477            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2478
2479            patt.ignore(cStyleComment)
2480            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2481        """
2482        if isinstance(other, basestring):
2483            other = Suppress(other)
2484
2485        if isinstance(other, Suppress):
2486            if other not in self.ignoreExprs:
2487                self.ignoreExprs.append(other)
2488        else:
2489            self.ignoreExprs.append(Suppress(other.copy()))
2490        return self
2491
2492    def setDebugActions(self, startAction, successAction, exceptionAction):
2493        """
2494        Enable display of debugging messages while doing pattern matching.
2495        """
2496        self.debugActions = (startAction or _defaultStartDebugAction,
2497                             successAction or _defaultSuccessDebugAction,
2498                             exceptionAction or _defaultExceptionDebugAction)
2499        self.debug = True
2500        return self
2501
2502    def setDebug(self, flag=True):
2503        """
2504        Enable display of debugging messages while doing pattern matching.
2505        Set ``flag`` to True to enable, False to disable.
2506
2507        Example::
2508
2509            wd = Word(alphas).setName("alphaword")
2510            integer = Word(nums).setName("numword")
2511            term = wd | integer
2512
2513            # turn on debugging for wd
2514            wd.setDebug()
2515
2516            OneOrMore(term).parseString("abc 123 xyz 890")
2517
2518        prints::
2519
2520            Match alphaword at loc 0(1,1)
2521            Matched alphaword -> ['abc']
2522            Match alphaword at loc 3(1,4)
2523            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2524            Match alphaword at loc 7(1,8)
2525            Matched alphaword -> ['xyz']
2526            Match alphaword at loc 11(1,12)
2527            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2528            Match alphaword at loc 15(1,16)
2529            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2530
2531        The output shown is that produced by the default debug actions - custom debug actions can be
2532        specified using :class:`setDebugActions`. Prior to attempting
2533        to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"``
2534        is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"``
2535        message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression,
2536        which makes debugging and exception messages easier to understand - for instance, the default
2537        name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``.
2538        """
2539        if flag:
2540            self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
2541        else:
2542            self.debug = False
2543        return self
2544
2545    def __str__(self):
2546        return self.name
2547
2548    def __repr__(self):
2549        return _ustr(self)
2550
2551    def streamline(self):
2552        self.streamlined = True
2553        self.strRepr = None
2554        return self
2555
2556    def checkRecursion(self, parseElementList):
2557        pass
2558
2559    def validate(self, validateTrace=None):
2560        """
2561        Check defined expressions for valid structure, check for infinite recursive definitions.
2562        """
2563        self.checkRecursion([])
2564
2565    def parseFile(self, file_or_filename, parseAll=False):
2566        """
2567        Execute the parse expression on the given file or filename.
2568        If a filename is specified (instead of a file object),
2569        the entire file is opened, read, and closed before parsing.
2570        """
2571        try:
2572            file_contents = file_or_filename.read()
2573        except AttributeError:
2574            with open(file_or_filename, "r") as f:
2575                file_contents = f.read()
2576        try:
2577            return self.parseString(file_contents, parseAll)
2578        except ParseBaseException as exc:
2579            if ParserElement.verbose_stacktrace:
2580                raise
2581            else:
2582                # catch and re-raise exception from here, clearing out pyparsing internal stack trace
2583                if getattr(exc, '__traceback__', None) is not None:
2584                    exc.__traceback__ = self._trim_traceback(exc.__traceback__)
2585                raise exc
2586
2587    def __eq__(self, other):
2588        if self is other:
2589            return True
2590        elif isinstance(other, basestring):
2591            return self.matches(other)
2592        elif isinstance(other, ParserElement):
2593            return vars(self) == vars(other)
2594        return False
2595
2596    def __ne__(self, other):
2597        return not (self == other)
2598
2599    def __hash__(self):
2600        return id(self)
2601
2602    def __req__(self, other):
2603        return self == other
2604
2605    def __rne__(self, other):
2606        return not (self == other)
2607
2608    def matches(self, testString, parseAll=True):
2609        """
2610        Method for quick testing of a parser against a test string. Good for simple
2611        inline microtests of sub expressions while building up larger parser.
2612
2613        Parameters:
2614         - testString - to test against this expression for a match
2615         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2616
2617        Example::
2618
2619            expr = Word(nums)
2620            assert expr.matches("100")
2621        """
2622        try:
2623            self.parseString(_ustr(testString), parseAll=parseAll)
2624            return True
2625        except ParseBaseException:
2626            return False
2627
2628    def runTests(self, tests, parseAll=True, comment='#',
2629                 fullDump=True, printResults=True, failureTests=False, postParse=None,
2630                 file=None):
2631        """
2632        Execute the parse expression on a series of test strings, showing each
2633        test, the parsed results or where the parse failed. Quick and easy way to
2634        run a parse expression against a list of sample strings.
2635
2636        Parameters:
2637         - tests - a list of separate test strings, or a multiline string of test strings
2638         - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests
2639         - comment - (default= ``'#'``) - expression for indicating embedded comments in the test
2640              string; pass None to disable comment filtering
2641         - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline;
2642              if False, only dump nested list
2643         - printResults - (default= ``True``) prints test output to stdout
2644         - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing
2645         - postParse - (default= ``None``) optional callback for successful parse results; called as
2646              `fn(test_string, parse_results)` and returns a string to be added to the test output
2647         - file - (default=``None``) optional file-like object to which test output will be written;
2648              if None, will default to ``sys.stdout``
2649
2650        Returns: a (success, results) tuple, where success indicates that all tests succeeded
2651        (or failed if ``failureTests`` is True), and the results contain a list of lines of each
2652        test's output
2653
2654        Example::
2655
2656            number_expr = pyparsing_common.number.copy()
2657
2658            result = number_expr.runTests('''
2659                # unsigned integer
2660                100
2661                # negative integer
2662                -100
2663                # float with scientific notation
2664                6.02e23
2665                # integer with scientific notation
2666                1e-12
2667                ''')
2668            print("Success" if result[0] else "Failed!")
2669
2670            result = number_expr.runTests('''
2671                # stray character
2672                100Z
2673                # missing leading digit before '.'
2674                -.100
2675                # too many '.'
2676                3.14.159
2677                ''', failureTests=True)
2678            print("Success" if result[0] else "Failed!")
2679
2680        prints::
2681
2682            # unsigned integer
2683            100
2684            [100]
2685
2686            # negative integer
2687            -100
2688            [-100]
2689
2690            # float with scientific notation
2691            6.02e23
2692            [6.02e+23]
2693
2694            # integer with scientific notation
2695            1e-12
2696            [1e-12]
2697
2698            Success
2699
2700            # stray character
2701            100Z
2702               ^
2703            FAIL: Expected end of text (at char 3), (line:1, col:4)
2704
2705            # missing leading digit before '.'
2706            -.100
2707            ^
2708            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2709
2710            # too many '.'
2711            3.14.159
2712                ^
2713            FAIL: Expected end of text (at char 4), (line:1, col:5)
2714
2715            Success
2716
2717        Each test string must be on a single line. If you want to test a string that spans multiple
2718        lines, create a test like this::
2719
2720            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2721
2722        (Note that this is a raw string literal, you must include the leading 'r'.)
2723        """
2724        if isinstance(tests, basestring):
2725            tests = list(map(str.strip, tests.rstrip().splitlines()))
2726        if isinstance(comment, basestring):
2727            comment = Literal(comment)
2728        if file is None:
2729            file = sys.stdout
2730        print_ = file.write
2731
2732        allResults = []
2733        comments = []
2734        success = True
2735        NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString)
2736        BOM = u'\ufeff'
2737        for t in tests:
2738            if comment is not None and comment.matches(t, False) or comments and not t:
2739                comments.append(t)
2740                continue
2741            if not t:
2742                continue
2743            out = ['\n' + '\n'.join(comments) if comments else '', t]
2744            comments = []
2745            try:
2746                # convert newline marks to actual newlines, and strip leading BOM if present
2747                t = NL.transformString(t.lstrip(BOM))
2748                result = self.parseString(t, parseAll=parseAll)
2749            except ParseBaseException as pe:
2750                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2751                if '\n' in t:
2752                    out.append(line(pe.loc, t))
2753                    out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal)
2754                else:
2755                    out.append(' ' * pe.loc + '^' + fatal)
2756                out.append("FAIL: " + str(pe))
2757                success = success and failureTests
2758                result = pe
2759            except Exception as exc:
2760                out.append("FAIL-EXCEPTION: " + str(exc))
2761                success = success and failureTests
2762                result = exc
2763            else:
2764                success = success and not failureTests
2765                if postParse is not None:
2766                    try:
2767                        pp_value = postParse(t, result)
2768                        if pp_value is not None:
2769                            if isinstance(pp_value, ParseResults):
2770                                out.append(pp_value.dump())
2771                            else:
2772                                out.append(str(pp_value))
2773                        else:
2774                            out.append(result.dump())
2775                    except Exception as e:
2776                        out.append(result.dump(full=fullDump))
2777                        out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e))
2778                else:
2779                    out.append(result.dump(full=fullDump))
2780
2781            if printResults:
2782                if fullDump:
2783                    out.append('')
2784                print_('\n'.join(out))
2785
2786            allResults.append((t, result))
2787
2788        return success, allResults
2789
2790
2791class _PendingSkip(ParserElement):
2792    # internal placeholder class to hold a place were '...' is added to a parser element,
2793    # once another ParserElement is added, this placeholder will be replaced with a SkipTo
2794    def __init__(self, expr, must_skip=False):
2795        super(_PendingSkip, self).__init__()
2796        self.strRepr = str(expr + Empty()).replace('Empty', '...')
2797        self.name = self.strRepr
2798        self.anchor = expr
2799        self.must_skip = must_skip
2800
2801    def __add__(self, other):
2802        skipper = SkipTo(other).setName("...")("_skipped*")
2803        if self.must_skip:
2804            def must_skip(t):
2805                if not t._skipped or t._skipped.asList() == ['']:
2806                    del t[0]
2807                    t.pop("_skipped", None)
2808            def show_skip(t):
2809                if t._skipped.asList()[-1:] == ['']:
2810                    skipped = t.pop('_skipped')
2811                    t['_skipped'] = 'missing <' + repr(self.anchor) + '>'
2812            return (self.anchor + skipper().addParseAction(must_skip)
2813                    | skipper().addParseAction(show_skip)) + other
2814
2815        return self.anchor + skipper + other
2816
2817    def __repr__(self):
2818        return self.strRepr
2819
2820    def parseImpl(self, *args):
2821        raise Exception("use of `...` expression without following SkipTo target expression")
2822
2823
2824class Token(ParserElement):
2825    """Abstract :class:`ParserElement` subclass, for defining atomic
2826    matching patterns.
2827    """
2828    def __init__(self):
2829        super(Token, self).__init__(savelist=False)
2830
2831
2832class Empty(Token):
2833    """An empty token, will always match.
2834    """
2835    def __init__(self):
2836        super(Empty, self).__init__()
2837        self.name = "Empty"
2838        self.mayReturnEmpty = True
2839        self.mayIndexError = False
2840
2841
2842class NoMatch(Token):
2843    """A token that will never match.
2844    """
2845    def __init__(self):
2846        super(NoMatch, self).__init__()
2847        self.name = "NoMatch"
2848        self.mayReturnEmpty = True
2849        self.mayIndexError = False
2850        self.errmsg = "Unmatchable token"
2851
2852    def parseImpl(self, instring, loc, doActions=True):
2853        raise ParseException(instring, loc, self.errmsg, self)
2854
2855
2856class Literal(Token):
2857    """Token to exactly match a specified string.
2858
2859    Example::
2860
2861        Literal('blah').parseString('blah')  # -> ['blah']
2862        Literal('blah').parseString('blahfooblah')  # -> ['blah']
2863        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
2864
2865    For case-insensitive matching, use :class:`CaselessLiteral`.
2866
2867    For keyword matching (force word break before and after the matched string),
2868    use :class:`Keyword` or :class:`CaselessKeyword`.
2869    """
2870    def __init__(self, matchString):
2871        super(Literal, self).__init__()
2872        self.match = matchString
2873        self.matchLen = len(matchString)
2874        try:
2875            self.firstMatchChar = matchString[0]
2876        except IndexError:
2877            warnings.warn("null string passed to Literal; use Empty() instead",
2878                            SyntaxWarning, stacklevel=2)
2879            self.__class__ = Empty
2880        self.name = '"%s"' % _ustr(self.match)
2881        self.errmsg = "Expected " + self.name
2882        self.mayReturnEmpty = False
2883        self.mayIndexError = False
2884
2885        # Performance tuning: modify __class__ to select
2886        # a parseImpl optimized for single-character check
2887        if self.matchLen == 1 and type(self) is Literal:
2888            self.__class__ = _SingleCharLiteral
2889
2890    def parseImpl(self, instring, loc, doActions=True):
2891        if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc):
2892            return loc + self.matchLen, self.match
2893        raise ParseException(instring, loc, self.errmsg, self)
2894
2895class _SingleCharLiteral(Literal):
2896    def parseImpl(self, instring, loc, doActions=True):
2897        if instring[loc] == self.firstMatchChar:
2898            return loc + 1, self.match
2899        raise ParseException(instring, loc, self.errmsg, self)
2900
2901_L = Literal
2902ParserElement._literalStringClass = Literal
2903
2904class Keyword(Token):
2905    """Token to exactly match a specified string as a keyword, that is,
2906    it must be immediately followed by a non-keyword character.  Compare
2907    with :class:`Literal`:
2908
2909     - ``Literal("if")`` will match the leading ``'if'`` in
2910       ``'ifAndOnlyIf'``.
2911     - ``Keyword("if")`` will not; it will only match the leading
2912       ``'if'`` in ``'if x=1'``, or ``'if(y==2)'``
2913
2914    Accepts two optional constructor arguments in addition to the
2915    keyword string:
2916
2917     - ``identChars`` is a string of characters that would be valid
2918       identifier characters, defaulting to all alphanumerics + "_" and
2919       "$"
2920     - ``caseless`` allows case-insensitive matching, default is ``False``.
2921
2922    Example::
2923
2924        Keyword("start").parseString("start")  # -> ['start']
2925        Keyword("start").parseString("starting")  # -> Exception
2926
2927    For case-insensitive matching, use :class:`CaselessKeyword`.
2928    """
2929    DEFAULT_KEYWORD_CHARS = alphanums + "_$"
2930
2931    def __init__(self, matchString, identChars=None, caseless=False):
2932        super(Keyword, self).__init__()
2933        if identChars is None:
2934            identChars = Keyword.DEFAULT_KEYWORD_CHARS
2935        self.match = matchString
2936        self.matchLen = len(matchString)
2937        try:
2938            self.firstMatchChar = matchString[0]
2939        except IndexError:
2940            warnings.warn("null string passed to Keyword; use Empty() instead",
2941                          SyntaxWarning, stacklevel=2)
2942        self.name = '"%s"' % self.match
2943        self.errmsg = "Expected " + self.name
2944        self.mayReturnEmpty = False
2945        self.mayIndexError = False
2946        self.caseless = caseless
2947        if caseless:
2948            self.caselessmatch = matchString.upper()
2949            identChars = identChars.upper()
2950        self.identChars = set(identChars)
2951
2952    def parseImpl(self, instring, loc, doActions=True):
2953        if self.caseless:
2954            if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch)
2955                    and (loc >= len(instring) - self.matchLen
2956                         or instring[loc + self.matchLen].upper() not in self.identChars)
2957                    and (loc == 0
2958                         or instring[loc - 1].upper() not in self.identChars)):
2959                return loc + self.matchLen, self.match
2960
2961        else:
2962            if instring[loc] == self.firstMatchChar:
2963                if ((self.matchLen == 1 or instring.startswith(self.match, loc))
2964                        and (loc >= len(instring) - self.matchLen
2965                             or instring[loc + self.matchLen] not in self.identChars)
2966                        and (loc == 0 or instring[loc - 1] not in self.identChars)):
2967                    return loc + self.matchLen, self.match
2968
2969        raise ParseException(instring, loc, self.errmsg, self)
2970
2971    def copy(self):
2972        c = super(Keyword, self).copy()
2973        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2974        return c
2975
2976    @staticmethod
2977    def setDefaultKeywordChars(chars):
2978        """Overrides the default Keyword chars
2979        """
2980        Keyword.DEFAULT_KEYWORD_CHARS = chars
2981
2982class CaselessLiteral(Literal):
2983    """Token to match a specified string, ignoring case of letters.
2984    Note: the matched results will always be in the case of the given
2985    match string, NOT the case of the input text.
2986
2987    Example::
2988
2989        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2990
2991    (Contrast with example for :class:`CaselessKeyword`.)
2992    """
2993    def __init__(self, matchString):
2994        super(CaselessLiteral, self).__init__(matchString.upper())
2995        # Preserve the defining literal.
2996        self.returnString = matchString
2997        self.name = "'%s'" % self.returnString
2998        self.errmsg = "Expected " + self.name
2999
3000    def parseImpl(self, instring, loc, doActions=True):
3001        if instring[loc:loc + self.matchLen].upper() == self.match:
3002            return loc + self.matchLen, self.returnString
3003        raise ParseException(instring, loc, self.errmsg, self)
3004
3005class CaselessKeyword(Keyword):
3006    """
3007    Caseless version of :class:`Keyword`.
3008
3009    Example::
3010
3011        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
3012
3013    (Contrast with example for :class:`CaselessLiteral`.)
3014    """
3015    def __init__(self, matchString, identChars=None):
3016        super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True)
3017
3018class CloseMatch(Token):
3019    """A variation on :class:`Literal` which matches "close" matches,
3020    that is, strings with at most 'n' mismatching characters.
3021    :class:`CloseMatch` takes parameters:
3022
3023     - ``match_string`` - string to be matched
3024     - ``maxMismatches`` - (``default=1``) maximum number of
3025       mismatches allowed to count as a match
3026
3027    The results from a successful parse will contain the matched text
3028    from the input string and the following named results:
3029
3030     - ``mismatches`` - a list of the positions within the
3031       match_string where mismatches were found
3032     - ``original`` - the original match_string used to compare
3033       against the input string
3034
3035    If ``mismatches`` is an empty list, then the match was an exact
3036    match.
3037
3038    Example::
3039
3040        patt = CloseMatch("ATCATCGAATGGA")
3041        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
3042        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
3043
3044        # exact match
3045        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
3046
3047        # close match allowing up to 2 mismatches
3048        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
3049        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
3050    """
3051    def __init__(self, match_string, maxMismatches=1):
3052        super(CloseMatch, self).__init__()
3053        self.name = match_string
3054        self.match_string = match_string
3055        self.maxMismatches = maxMismatches
3056        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
3057        self.mayIndexError = False
3058        self.mayReturnEmpty = False
3059
3060    def parseImpl(self, instring, loc, doActions=True):
3061        start = loc
3062        instrlen = len(instring)
3063        maxloc = start + len(self.match_string)
3064
3065        if maxloc <= instrlen:
3066            match_string = self.match_string
3067            match_stringloc = 0
3068            mismatches = []
3069            maxMismatches = self.maxMismatches
3070
3071            for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)):
3072                src, mat = s_m
3073                if src != mat:
3074                    mismatches.append(match_stringloc)
3075                    if len(mismatches) > maxMismatches:
3076                        break
3077            else:
3078                loc = match_stringloc + 1
3079                results = ParseResults([instring[start:loc]])
3080                results['original'] = match_string
3081                results['mismatches'] = mismatches
3082                return loc, results
3083
3084        raise ParseException(instring, loc, self.errmsg, self)
3085
3086
3087class Word(Token):
3088    """Token for matching words composed of allowed character sets.
3089    Defined with string containing all allowed initial characters, an
3090    optional string containing allowed body characters (if omitted,
3091    defaults to the initial character set), and an optional minimum,
3092    maximum, and/or exact length.  The default value for ``min`` is
3093    1 (a minimum value < 1 is not valid); the default values for
3094    ``max`` and ``exact`` are 0, meaning no maximum or exact
3095    length restriction. An optional ``excludeChars`` parameter can
3096    list characters that might be found in the input ``bodyChars``
3097    string; useful to define a word of all printables except for one or
3098    two characters, for instance.
3099
3100    :class:`srange` is useful for defining custom character set strings
3101    for defining ``Word`` expressions, using range notation from
3102    regular expression character sets.
3103
3104    A common mistake is to use :class:`Word` to match a specific literal
3105    string, as in ``Word("Address")``. Remember that :class:`Word`
3106    uses the string argument to define *sets* of matchable characters.
3107    This expression would match "Add", "AAA", "dAred", or any other word
3108    made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an
3109    exact literal string, use :class:`Literal` or :class:`Keyword`.
3110
3111    pyparsing includes helper strings for building Words:
3112
3113     - :class:`alphas`
3114     - :class:`nums`
3115     - :class:`alphanums`
3116     - :class:`hexnums`
3117     - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255
3118       - accented, tilded, umlauted, etc.)
3119     - :class:`punc8bit` (non-alphabetic characters in ASCII range
3120       128-255 - currency, symbols, superscripts, diacriticals, etc.)
3121     - :class:`printables` (any non-whitespace character)
3122
3123    Example::
3124
3125        # a word composed of digits
3126        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
3127
3128        # a word with a leading capital, and zero or more lowercase
3129        capital_word = Word(alphas.upper(), alphas.lower())
3130
3131        # hostnames are alphanumeric, with leading alpha, and '-'
3132        hostname = Word(alphas, alphanums + '-')
3133
3134        # roman numeral (not a strict parser, accepts invalid mix of characters)
3135        roman = Word("IVXLCDM")
3136
3137        # any string of non-whitespace characters, except for ','
3138        csv_value = Word(printables, excludeChars=",")
3139    """
3140    def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None):
3141        super(Word, self).__init__()
3142        if excludeChars:
3143            excludeChars = set(excludeChars)
3144            initChars = ''.join(c for c in initChars if c not in excludeChars)
3145            if bodyChars:
3146                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
3147        self.initCharsOrig = initChars
3148        self.initChars = set(initChars)
3149        if bodyChars:
3150            self.bodyCharsOrig = bodyChars
3151            self.bodyChars = set(bodyChars)
3152        else:
3153            self.bodyCharsOrig = initChars
3154            self.bodyChars = set(initChars)
3155
3156        self.maxSpecified = max > 0
3157
3158        if min < 1:
3159            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
3160
3161        self.minLen = min
3162
3163        if max > 0:
3164            self.maxLen = max
3165        else:
3166            self.maxLen = _MAX_INT
3167
3168        if exact > 0:
3169            self.maxLen = exact
3170            self.minLen = exact
3171
3172        self.name = _ustr(self)
3173        self.errmsg = "Expected " + self.name
3174        self.mayIndexError = False
3175        self.asKeyword = asKeyword
3176
3177        if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
3178            if self.bodyCharsOrig == self.initCharsOrig:
3179                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
3180            elif len(self.initCharsOrig) == 1:
3181                self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig),
3182                                             _escapeRegexRangeChars(self.bodyCharsOrig),)
3183            else:
3184                self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig),
3185                                               _escapeRegexRangeChars(self.bodyCharsOrig),)
3186            if self.asKeyword:
3187                self.reString = r"\b" + self.reString + r"\b"
3188
3189            try:
3190                self.re = re.compile(self.reString)
3191            except Exception:
3192                self.re = None
3193            else:
3194                self.re_match = self.re.match
3195                self.__class__ = _WordRegex
3196
3197    def parseImpl(self, instring, loc, doActions=True):
3198        if instring[loc] not in self.initChars:
3199            raise ParseException(instring, loc, self.errmsg, self)
3200
3201        start = loc
3202        loc += 1
3203        instrlen = len(instring)
3204        bodychars = self.bodyChars
3205        maxloc = start + self.maxLen
3206        maxloc = min(maxloc, instrlen)
3207        while loc < maxloc and instring[loc] in bodychars:
3208            loc += 1
3209
3210        throwException = False
3211        if loc - start < self.minLen:
3212            throwException = True
3213        elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
3214            throwException = True
3215        elif self.asKeyword:
3216            if (start > 0 and instring[start - 1] in bodychars
3217                    or loc < instrlen and instring[loc] in bodychars):
3218                throwException = True
3219
3220        if throwException:
3221            raise ParseException(instring, loc, self.errmsg, self)
3222
3223        return loc, instring[start:loc]
3224
3225    def __str__(self):
3226        try:
3227            return super(Word, self).__str__()
3228        except Exception:
3229            pass
3230
3231        if self.strRepr is None:
3232
3233            def charsAsStr(s):
3234                if len(s) > 4:
3235                    return s[:4] + "..."
3236                else:
3237                    return s
3238
3239            if self.initCharsOrig != self.bodyCharsOrig:
3240                self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig))
3241            else:
3242                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
3243
3244        return self.strRepr
3245
3246class _WordRegex(Word):
3247    def parseImpl(self, instring, loc, doActions=True):
3248        result = self.re_match(instring, loc)
3249        if not result:
3250            raise ParseException(instring, loc, self.errmsg, self)
3251
3252        loc = result.end()
3253        return loc, result.group()
3254
3255
3256class Char(_WordRegex):
3257    """A short-cut class for defining ``Word(characters, exact=1)``,
3258    when defining a match of any single character in a string of
3259    characters.
3260    """
3261    def __init__(self, charset, asKeyword=False, excludeChars=None):
3262        super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars)
3263        self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars))
3264        if asKeyword:
3265            self.reString = r"\b%s\b" % self.reString
3266        self.re = re.compile(self.reString)
3267        self.re_match = self.re.match
3268
3269
3270class Regex(Token):
3271    r"""Token for matching strings that match a given regular
3272    expression. Defined with string specifying the regular expression in
3273    a form recognized by the stdlib Python  `re module <https://docs.python.org/3/library/re.html>`_.
3274    If the given regex contains named groups (defined using ``(?P<name>...)``),
3275    these will be preserved as named parse results.
3276
3277    If instead of the Python stdlib re module you wish to use a different RE module
3278    (such as the `regex` module), you can replace it by either building your
3279    Regex object with a compiled RE that was compiled using regex:
3280
3281    Example::
3282
3283        realnum = Regex(r"[+-]?\d+\.\d*")
3284        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
3285        # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
3286        roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
3287
3288        # use regex module instead of stdlib re module to construct a Regex using
3289        # a compiled regular expression
3290        import regex
3291        parser = pp.Regex(regex.compile(r'[0-9]'))
3292
3293    """
3294    def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False):
3295        """The parameters ``pattern`` and ``flags`` are passed
3296        to the ``re.compile()`` function as-is. See the Python
3297        `re module <https://docs.python.org/3/library/re.html>`_ module for an
3298        explanation of the acceptable patterns and flags.
3299        """
3300        super(Regex, self).__init__()
3301
3302        if isinstance(pattern, basestring):
3303            if not pattern:
3304                warnings.warn("null string passed to Regex; use Empty() instead",
3305                              SyntaxWarning, stacklevel=2)
3306
3307            self.pattern = pattern
3308            self.flags = flags
3309
3310            try:
3311                self.re = re.compile(self.pattern, self.flags)
3312                self.reString = self.pattern
3313            except sre_constants.error:
3314                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
3315                              SyntaxWarning, stacklevel=2)
3316                raise
3317
3318        elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'):
3319            self.re = pattern
3320            self.pattern = self.reString = pattern.pattern
3321            self.flags = flags
3322
3323        else:
3324            raise TypeError("Regex may only be constructed with a string or a compiled RE object")
3325
3326        self.re_match = self.re.match
3327
3328        self.name = _ustr(self)
3329        self.errmsg = "Expected " + self.name
3330        self.mayIndexError = False
3331        self.mayReturnEmpty = self.re_match("") is not None
3332        self.asGroupList = asGroupList
3333        self.asMatch = asMatch
3334        if self.asGroupList:
3335            self.parseImpl = self.parseImplAsGroupList
3336        if self.asMatch:
3337            self.parseImpl = self.parseImplAsMatch
3338
3339    def parseImpl(self, instring, loc, doActions=True):
3340        result = self.re_match(instring, loc)
3341        if not result:
3342            raise ParseException(instring, loc, self.errmsg, self)
3343
3344        loc = result.end()
3345        ret = ParseResults(result.group())
3346        d = result.groupdict()
3347        if d:
3348            for k, v in d.items():
3349                ret[k] = v
3350        return loc, ret
3351
3352    def parseImplAsGroupList(self, instring, loc, doActions=True):
3353        result = self.re_match(instring, loc)
3354        if not result:
3355            raise ParseException(instring, loc, self.errmsg, self)
3356
3357        loc = result.end()
3358        ret = result.groups()
3359        return loc, ret
3360
3361    def parseImplAsMatch(self, instring, loc, doActions=True):
3362        result = self.re_match(instring, loc)
3363        if not result:
3364            raise ParseException(instring, loc, self.errmsg, self)
3365
3366        loc = result.end()
3367        ret = result
3368        return loc, ret
3369
3370    def __str__(self):
3371        try:
3372            return super(Regex, self).__str__()
3373        except Exception:
3374            pass
3375
3376        if self.strRepr is None:
3377            self.strRepr = "Re:(%s)" % repr(self.pattern)
3378
3379        return self.strRepr
3380
3381    def sub(self, repl):
3382        r"""
3383        Return Regex with an attached parse action to transform the parsed
3384        result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_.
3385
3386        Example::
3387
3388            make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>")
3389            print(make_html.transformString("h1:main title:"))
3390            # prints "<h1>main title</h1>"
3391        """
3392        if self.asGroupList:
3393            warnings.warn("cannot use sub() with Regex(asGroupList=True)",
3394                          SyntaxWarning, stacklevel=2)
3395            raise SyntaxError()
3396
3397        if self.asMatch and callable(repl):
3398            warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)",
3399                          SyntaxWarning, stacklevel=2)
3400            raise SyntaxError()
3401
3402        if self.asMatch:
3403            def pa(tokens):
3404                return tokens[0].expand(repl)
3405        else:
3406            def pa(tokens):
3407                return self.re.sub(repl, tokens[0])
3408        return self.addParseAction(pa)
3409
3410class QuotedString(Token):
3411    r"""
3412    Token for matching strings that are delimited by quoting characters.
3413
3414    Defined with the following parameters:
3415
3416        - quoteChar - string of one or more characters defining the
3417          quote delimiting string
3418        - escChar - character to escape quotes, typically backslash
3419          (default= ``None``)
3420        - escQuote - special quote sequence to escape an embedded quote
3421          string (such as SQL's ``""`` to escape an embedded ``"``)
3422          (default= ``None``)
3423        - multiline - boolean indicating whether quotes can span
3424          multiple lines (default= ``False``)
3425        - unquoteResults - boolean indicating whether the matched text
3426          should be unquoted (default= ``True``)
3427        - endQuoteChar - string of one or more characters defining the
3428          end of the quote delimited string (default= ``None``  => same as
3429          quoteChar)
3430        - convertWhitespaceEscapes - convert escaped whitespace
3431          (``'\t'``, ``'\n'``, etc.) to actual whitespace
3432          (default= ``True``)
3433
3434    Example::
3435
3436        qs = QuotedString('"')
3437        print(qs.searchString('lsjdf "This is the quote" sldjf'))
3438        complex_qs = QuotedString('{{', endQuoteChar='}}')
3439        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
3440        sql_qs = QuotedString('"', escQuote='""')
3441        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
3442
3443    prints::
3444
3445        [['This is the quote']]
3446        [['This is the "quote"']]
3447        [['This is the quote with "embedded" quotes']]
3448    """
3449    def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False,
3450                 unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
3451        super(QuotedString, self).__init__()
3452
3453        # remove white space from quote chars - wont work anyway
3454        quoteChar = quoteChar.strip()
3455        if not quoteChar:
3456            warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3457            raise SyntaxError()
3458
3459        if endQuoteChar is None:
3460            endQuoteChar = quoteChar
3461        else:
3462            endQuoteChar = endQuoteChar.strip()
3463            if not endQuoteChar:
3464                warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
3465                raise SyntaxError()
3466
3467        self.quoteChar = quoteChar
3468        self.quoteCharLen = len(quoteChar)
3469        self.firstQuoteChar = quoteChar[0]
3470        self.endQuoteChar = endQuoteChar
3471        self.endQuoteCharLen = len(endQuoteChar)
3472        self.escChar = escChar
3473        self.escQuote = escQuote
3474        self.unquoteResults = unquoteResults
3475        self.convertWhitespaceEscapes = convertWhitespaceEscapes
3476
3477        if multiline:
3478            self.flags = re.MULTILINE | re.DOTALL
3479            self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar),
3480                                              _escapeRegexRangeChars(self.endQuoteChar[0]),
3481                                              (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3482        else:
3483            self.flags = 0
3484            self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar),
3485                                                  _escapeRegexRangeChars(self.endQuoteChar[0]),
3486                                                  (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
3487        if len(self.endQuoteChar) > 1:
3488            self.pattern += (
3489                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
3490                                                   _escapeRegexRangeChars(self.endQuoteChar[i]))
3491                                      for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')')
3492
3493        if escQuote:
3494            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
3495        if escChar:
3496            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
3497            self.escCharReplacePattern = re.escape(self.escChar) + "(.)"
3498        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
3499
3500        try:
3501            self.re = re.compile(self.pattern, self.flags)
3502            self.reString = self.pattern
3503            self.re_match = self.re.match
3504        except sre_constants.error:
3505            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
3506                          SyntaxWarning, stacklevel=2)
3507            raise
3508
3509        self.name = _ustr(self)
3510        self.errmsg = "Expected " + self.name
3511        self.mayIndexError = False
3512        self.mayReturnEmpty = True
3513
3514    def parseImpl(self, instring, loc, doActions=True):
3515        result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None
3516        if not result:
3517            raise ParseException(instring, loc, self.errmsg, self)
3518
3519        loc = result.end()
3520        ret = result.group()
3521
3522        if self.unquoteResults:
3523
3524            # strip off quotes
3525            ret = ret[self.quoteCharLen: -self.endQuoteCharLen]
3526
3527            if isinstance(ret, basestring):
3528                # replace escaped whitespace
3529                if '\\' in ret and self.convertWhitespaceEscapes:
3530                    ws_map = {
3531                        r'\t': '\t',
3532                        r'\n': '\n',
3533                        r'\f': '\f',
3534                        r'\r': '\r',
3535                    }
3536                    for wslit, wschar in ws_map.items():
3537                        ret = ret.replace(wslit, wschar)
3538
3539                # replace escaped characters
3540                if self.escChar:
3541                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
3542
3543                # replace escaped quotes
3544                if self.escQuote:
3545                    ret = ret.replace(self.escQuote, self.endQuoteChar)
3546
3547        return loc, ret
3548
3549    def __str__(self):
3550        try:
3551            return super(QuotedString, self).__str__()
3552        except Exception:
3553            pass
3554
3555        if self.strRepr is None:
3556            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
3557
3558        return self.strRepr
3559
3560
3561class CharsNotIn(Token):
3562    """Token for matching words composed of characters *not* in a given
3563    set (will include whitespace in matched characters if not listed in
3564    the provided exclusion set - see example). Defined with string
3565    containing all disallowed characters, and an optional minimum,
3566    maximum, and/or exact length.  The default value for ``min`` is
3567    1 (a minimum value < 1 is not valid); the default values for
3568    ``max`` and ``exact`` are 0, meaning no maximum or exact
3569    length restriction.
3570
3571    Example::
3572
3573        # define a comma-separated-value as anything that is not a ','
3574        csv_value = CharsNotIn(',')
3575        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
3576
3577    prints::
3578
3579        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
3580    """
3581    def __init__(self, notChars, min=1, max=0, exact=0):
3582        super(CharsNotIn, self).__init__()
3583        self.skipWhitespace = False
3584        self.notChars = notChars
3585
3586        if min < 1:
3587            raise ValueError("cannot specify a minimum length < 1; use "
3588                             "Optional(CharsNotIn()) if zero-length char group is permitted")
3589
3590        self.minLen = min
3591
3592        if max > 0:
3593            self.maxLen = max
3594        else:
3595            self.maxLen = _MAX_INT
3596
3597        if exact > 0:
3598            self.maxLen = exact
3599            self.minLen = exact
3600
3601        self.name = _ustr(self)
3602        self.errmsg = "Expected " + self.name
3603        self.mayReturnEmpty = (self.minLen == 0)
3604        self.mayIndexError = False
3605
3606    def parseImpl(self, instring, loc, doActions=True):
3607        if instring[loc] in self.notChars:
3608            raise ParseException(instring, loc, self.errmsg, self)
3609
3610        start = loc
3611        loc += 1
3612        notchars = self.notChars
3613        maxlen = min(start + self.maxLen, len(instring))
3614        while loc < maxlen and instring[loc] not in notchars:
3615            loc += 1
3616
3617        if loc - start < self.minLen:
3618            raise ParseException(instring, loc, self.errmsg, self)
3619
3620        return loc, instring[start:loc]
3621
3622    def __str__(self):
3623        try:
3624            return super(CharsNotIn, self).__str__()
3625        except Exception:
3626            pass
3627
3628        if self.strRepr is None:
3629            if len(self.notChars) > 4:
3630                self.strRepr = "!W:(%s...)" % self.notChars[:4]
3631            else:
3632                self.strRepr = "!W:(%s)" % self.notChars
3633
3634        return self.strRepr
3635
3636class White(Token):
3637    """Special matching class for matching whitespace.  Normally,
3638    whitespace is ignored by pyparsing grammars.  This class is included
3639    when some whitespace structures are significant.  Define with
3640    a string containing the whitespace characters to be matched; default
3641    is ``" \\t\\r\\n"``.  Also takes optional ``min``,
3642    ``max``, and ``exact`` arguments, as defined for the
3643    :class:`Word` class.
3644    """
3645    whiteStrs = {
3646        ' ' : '<SP>',
3647        '\t': '<TAB>',
3648        '\n': '<LF>',
3649        '\r': '<CR>',
3650        '\f': '<FF>',
3651        u'\u00A0': '<NBSP>',
3652        u'\u1680': '<OGHAM_SPACE_MARK>',
3653        u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>',
3654        u'\u2000': '<EN_QUAD>',
3655        u'\u2001': '<EM_QUAD>',
3656        u'\u2002': '<EN_SPACE>',
3657        u'\u2003': '<EM_SPACE>',
3658        u'\u2004': '<THREE-PER-EM_SPACE>',
3659        u'\u2005': '<FOUR-PER-EM_SPACE>',
3660        u'\u2006': '<SIX-PER-EM_SPACE>',
3661        u'\u2007': '<FIGURE_SPACE>',
3662        u'\u2008': '<PUNCTUATION_SPACE>',
3663        u'\u2009': '<THIN_SPACE>',
3664        u'\u200A': '<HAIR_SPACE>',
3665        u'\u200B': '<ZERO_WIDTH_SPACE>',
3666        u'\u202F': '<NNBSP>',
3667        u'\u205F': '<MMSP>',
3668        u'\u3000': '<IDEOGRAPHIC_SPACE>',
3669        }
3670    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3671        super(White, self).__init__()
3672        self.matchWhite = ws
3673        self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite))
3674        # ~ self.leaveWhitespace()
3675        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3676        self.mayReturnEmpty = True
3677        self.errmsg = "Expected " + self.name
3678
3679        self.minLen = min
3680
3681        if max > 0:
3682            self.maxLen = max
3683        else:
3684            self.maxLen = _MAX_INT
3685
3686        if exact > 0:
3687            self.maxLen = exact
3688            self.minLen = exact
3689
3690    def parseImpl(self, instring, loc, doActions=True):
3691        if instring[loc] not in self.matchWhite:
3692            raise ParseException(instring, loc, self.errmsg, self)
3693        start = loc
3694        loc += 1
3695        maxloc = start + self.maxLen
3696        maxloc = min(maxloc, len(instring))
3697        while loc < maxloc and instring[loc] in self.matchWhite:
3698            loc += 1
3699
3700        if loc - start < self.minLen:
3701            raise ParseException(instring, loc, self.errmsg, self)
3702
3703        return loc, instring[start:loc]
3704
3705
3706class _PositionToken(Token):
3707    def __init__(self):
3708        super(_PositionToken, self).__init__()
3709        self.name = self.__class__.__name__
3710        self.mayReturnEmpty = True
3711        self.mayIndexError = False
3712
3713class GoToColumn(_PositionToken):
3714    """Token to advance to a specific column of input text; useful for
3715    tabular report scraping.
3716    """
3717    def __init__(self, colno):
3718        super(GoToColumn, self).__init__()
3719        self.col = colno
3720
3721    def preParse(self, instring, loc):
3722        if col(loc, instring) != self.col:
3723            instrlen = len(instring)
3724            if self.ignoreExprs:
3725                loc = self._skipIgnorables(instring, loc)
3726            while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col:
3727                loc += 1
3728        return loc
3729
3730    def parseImpl(self, instring, loc, doActions=True):
3731        thiscol = col(loc, instring)
3732        if thiscol > self.col:
3733            raise ParseException(instring, loc, "Text not in expected column", self)
3734        newloc = loc + self.col - thiscol
3735        ret = instring[loc: newloc]
3736        return newloc, ret
3737
3738
3739class LineStart(_PositionToken):
3740    r"""Matches if current position is at the beginning of a line within
3741    the parse string
3742
3743    Example::
3744
3745        test = '''\
3746        AAA this line
3747        AAA and this line
3748          AAA but not this one
3749        B AAA and definitely not this one
3750        '''
3751
3752        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3753            print(t)
3754
3755    prints::
3756
3757        ['AAA', ' this line']
3758        ['AAA', ' and this line']
3759
3760    """
3761    def __init__(self):
3762        super(LineStart, self).__init__()
3763        self.errmsg = "Expected start of line"
3764
3765    def parseImpl(self, instring, loc, doActions=True):
3766        if col(loc, instring) == 1:
3767            return loc, []
3768        raise ParseException(instring, loc, self.errmsg, self)
3769
3770class LineEnd(_PositionToken):
3771    """Matches if current position is at the end of a line within the
3772    parse string
3773    """
3774    def __init__(self):
3775        super(LineEnd, self).__init__()
3776        self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", ""))
3777        self.errmsg = "Expected end of line"
3778
3779    def parseImpl(self, instring, loc, doActions=True):
3780        if loc < len(instring):
3781            if instring[loc] == "\n":
3782                return loc + 1, "\n"
3783            else:
3784                raise ParseException(instring, loc, self.errmsg, self)
3785        elif loc == len(instring):
3786            return loc + 1, []
3787        else:
3788            raise ParseException(instring, loc, self.errmsg, self)
3789
3790class StringStart(_PositionToken):
3791    """Matches if current position is at the beginning of the parse
3792    string
3793    """
3794    def __init__(self):
3795        super(StringStart, self).__init__()
3796        self.errmsg = "Expected start of text"
3797
3798    def parseImpl(self, instring, loc, doActions=True):
3799        if loc != 0:
3800            # see if entire string up to here is just whitespace and ignoreables
3801            if loc != self.preParse(instring, 0):
3802                raise ParseException(instring, loc, self.errmsg, self)
3803        return loc, []
3804
3805class StringEnd(_PositionToken):
3806    """Matches if current position is at the end of the parse string
3807    """
3808    def __init__(self):
3809        super(StringEnd, self).__init__()
3810        self.errmsg = "Expected end of text"
3811
3812    def parseImpl(self, instring, loc, doActions=True):
3813        if loc < len(instring):
3814            raise ParseException(instring, loc, self.errmsg, self)
3815        elif loc == len(instring):
3816            return loc + 1, []
3817        elif loc > len(instring):
3818            return loc, []
3819        else:
3820            raise ParseException(instring, loc, self.errmsg, self)
3821
3822class WordStart(_PositionToken):
3823    """Matches if the current position is at the beginning of a Word,
3824    and is not preceded by any character in a given set of
3825    ``wordChars`` (default= ``printables``). To emulate the
3826    ``\b`` behavior of regular expressions, use
3827    ``WordStart(alphanums)``. ``WordStart`` will also match at
3828    the beginning of the string being parsed, or at the beginning of
3829    a line.
3830    """
3831    def __init__(self, wordChars=printables):
3832        super(WordStart, self).__init__()
3833        self.wordChars = set(wordChars)
3834        self.errmsg = "Not at the start of a word"
3835
3836    def parseImpl(self, instring, loc, doActions=True):
3837        if loc != 0:
3838            if (instring[loc - 1] in self.wordChars
3839                    or instring[loc] not in self.wordChars):
3840                raise ParseException(instring, loc, self.errmsg, self)
3841        return loc, []
3842
3843class WordEnd(_PositionToken):
3844    """Matches if the current position is at the end of a Word, and is
3845    not followed by any character in a given set of ``wordChars``
3846    (default= ``printables``). To emulate the ``\b`` behavior of
3847    regular expressions, use ``WordEnd(alphanums)``. ``WordEnd``
3848    will also match at the end of the string being parsed, or at the end
3849    of a line.
3850    """
3851    def __init__(self, wordChars=printables):
3852        super(WordEnd, self).__init__()
3853        self.wordChars = set(wordChars)
3854        self.skipWhitespace = False
3855        self.errmsg = "Not at the end of a word"
3856
3857    def parseImpl(self, instring, loc, doActions=True):
3858        instrlen = len(instring)
3859        if instrlen > 0 and loc < instrlen:
3860            if (instring[loc] in self.wordChars or
3861                    instring[loc - 1] not in self.wordChars):
3862                raise ParseException(instring, loc, self.errmsg, self)
3863        return loc, []
3864
3865
3866class ParseExpression(ParserElement):
3867    """Abstract subclass of ParserElement, for combining and
3868    post-processing parsed tokens.
3869    """
3870    def __init__(self, exprs, savelist=False):
3871        super(ParseExpression, self).__init__(savelist)
3872        if isinstance(exprs, _generatorType):
3873            exprs = list(exprs)
3874
3875        if isinstance(exprs, basestring):
3876            self.exprs = [self._literalStringClass(exprs)]
3877        elif isinstance(exprs, ParserElement):
3878            self.exprs = [exprs]
3879        elif isinstance(exprs, Iterable):
3880            exprs = list(exprs)
3881            # if sequence of strings provided, wrap with Literal
3882            if any(isinstance(expr, basestring) for expr in exprs):
3883                exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs)
3884            self.exprs = list(exprs)
3885        else:
3886            try:
3887                self.exprs = list(exprs)
3888            except TypeError:
3889                self.exprs = [exprs]
3890        self.callPreparse = False
3891
3892    def append(self, other):
3893        self.exprs.append(other)
3894        self.strRepr = None
3895        return self
3896
3897    def leaveWhitespace(self):
3898        """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on
3899           all contained expressions."""
3900        self.skipWhitespace = False
3901        self.exprs = [e.copy() for e in self.exprs]
3902        for e in self.exprs:
3903            e.leaveWhitespace()
3904        return self
3905
3906    def ignore(self, other):
3907        if isinstance(other, Suppress):
3908            if other not in self.ignoreExprs:
3909                super(ParseExpression, self).ignore(other)
3910                for e in self.exprs:
3911                    e.ignore(self.ignoreExprs[-1])
3912        else:
3913            super(ParseExpression, self).ignore(other)
3914            for e in self.exprs:
3915                e.ignore(self.ignoreExprs[-1])
3916        return self
3917
3918    def __str__(self):
3919        try:
3920            return super(ParseExpression, self).__str__()
3921        except Exception:
3922            pass
3923
3924        if self.strRepr is None:
3925            self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs))
3926        return self.strRepr
3927
3928    def streamline(self):
3929        super(ParseExpression, self).streamline()
3930
3931        for e in self.exprs:
3932            e.streamline()
3933
3934        # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d)
3935        # but only if there are no parse actions or resultsNames on the nested And's
3936        # (likewise for Or's and MatchFirst's)
3937        if len(self.exprs) == 2:
3938            other = self.exprs[0]
3939            if (isinstance(other, self.__class__)
3940                    and not other.parseAction
3941                    and other.resultsName is None
3942                    and not other.debug):
3943                self.exprs = other.exprs[:] + [self.exprs[1]]
3944                self.strRepr = None
3945                self.mayReturnEmpty |= other.mayReturnEmpty
3946                self.mayIndexError  |= other.mayIndexError
3947
3948            other = self.exprs[-1]
3949            if (isinstance(other, self.__class__)
3950                    and not other.parseAction
3951                    and other.resultsName is None
3952                    and not other.debug):
3953                self.exprs = self.exprs[:-1] + other.exprs[:]
3954                self.strRepr = None
3955                self.mayReturnEmpty |= other.mayReturnEmpty
3956                self.mayIndexError  |= other.mayIndexError
3957
3958        self.errmsg = "Expected " + _ustr(self)
3959
3960        return self
3961
3962    def validate(self, validateTrace=None):
3963        tmp = (validateTrace if validateTrace is not None else [])[:] + [self]
3964        for e in self.exprs:
3965            e.validate(tmp)
3966        self.checkRecursion([])
3967
3968    def copy(self):
3969        ret = super(ParseExpression, self).copy()
3970        ret.exprs = [e.copy() for e in self.exprs]
3971        return ret
3972
3973    def _setResultsName(self, name, listAllMatches=False):
3974        if __diag__.warn_ungrouped_named_tokens_in_collection:
3975            for e in self.exprs:
3976                if isinstance(e, ParserElement) and e.resultsName:
3977                    warnings.warn("{0}: setting results name {1!r} on {2} expression "
3978                                  "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
3979                                                                                       name,
3980                                                                                       type(self).__name__,
3981                                                                                       e.resultsName),
3982                                  stacklevel=3)
3983
3984        return super(ParseExpression, self)._setResultsName(name, listAllMatches)
3985
3986
3987class And(ParseExpression):
3988    """
3989    Requires all given :class:`ParseExpression` s to be found in the given order.
3990    Expressions may be separated by whitespace.
3991    May be constructed using the ``'+'`` operator.
3992    May also be constructed using the ``'-'`` operator, which will
3993    suppress backtracking.
3994
3995    Example::
3996
3997        integer = Word(nums)
3998        name_expr = OneOrMore(Word(alphas))
3999
4000        expr = And([integer("id"), name_expr("name"), integer("age")])
4001        # more easily written as:
4002        expr = integer("id") + name_expr("name") + integer("age")
4003    """
4004
4005    class _ErrorStop(Empty):
4006        def __init__(self, *args, **kwargs):
4007            super(And._ErrorStop, self).__init__(*args, **kwargs)
4008            self.name = '-'
4009            self.leaveWhitespace()
4010
4011    def __init__(self, exprs, savelist=True):
4012        exprs = list(exprs)
4013        if exprs and Ellipsis in exprs:
4014            tmp = []
4015            for i, expr in enumerate(exprs):
4016                if expr is Ellipsis:
4017                    if i < len(exprs) - 1:
4018                        skipto_arg = (Empty() + exprs[i + 1]).exprs[-1]
4019                        tmp.append(SkipTo(skipto_arg)("_skipped*"))
4020                    else:
4021                        raise Exception("cannot construct And with sequence ending in ...")
4022                else:
4023                    tmp.append(expr)
4024            exprs[:] = tmp
4025        super(And, self).__init__(exprs, savelist)
4026        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4027        self.setWhitespaceChars(self.exprs[0].whiteChars)
4028        self.skipWhitespace = self.exprs[0].skipWhitespace
4029        self.callPreparse = True
4030
4031    def streamline(self):
4032        # collapse any _PendingSkip's
4033        if self.exprs:
4034            if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip)
4035                   for e in self.exprs[:-1]):
4036                for i, e in enumerate(self.exprs[:-1]):
4037                    if e is None:
4038                        continue
4039                    if (isinstance(e, ParseExpression)
4040                            and e.exprs and isinstance(e.exprs[-1], _PendingSkip)):
4041                        e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1]
4042                        self.exprs[i + 1] = None
4043                self.exprs = [e for e in self.exprs if e is not None]
4044
4045        super(And, self).streamline()
4046        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4047        return self
4048
4049    def parseImpl(self, instring, loc, doActions=True):
4050        # pass False as last arg to _parse for first element, since we already
4051        # pre-parsed the string as part of our And pre-parsing
4052        loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False)
4053        errorStop = False
4054        for e in self.exprs[1:]:
4055            if isinstance(e, And._ErrorStop):
4056                errorStop = True
4057                continue
4058            if errorStop:
4059                try:
4060                    loc, exprtokens = e._parse(instring, loc, doActions)
4061                except ParseSyntaxException:
4062                    raise
4063                except ParseBaseException as pe:
4064                    pe.__traceback__ = None
4065                    raise ParseSyntaxException._from_exception(pe)
4066                except IndexError:
4067                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
4068            else:
4069                loc, exprtokens = e._parse(instring, loc, doActions)
4070            if exprtokens or exprtokens.haskeys():
4071                resultlist += exprtokens
4072        return loc, resultlist
4073
4074    def __iadd__(self, other):
4075        if isinstance(other, basestring):
4076            other = self._literalStringClass(other)
4077        return self.append(other)  # And([self, other])
4078
4079    def checkRecursion(self, parseElementList):
4080        subRecCheckList = parseElementList[:] + [self]
4081        for e in self.exprs:
4082            e.checkRecursion(subRecCheckList)
4083            if not e.mayReturnEmpty:
4084                break
4085
4086    def __str__(self):
4087        if hasattr(self, "name"):
4088            return self.name
4089
4090        if self.strRepr is None:
4091            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
4092
4093        return self.strRepr
4094
4095
4096class Or(ParseExpression):
4097    """Requires that at least one :class:`ParseExpression` is found. If
4098    two expressions match, the expression that matches the longest
4099    string will be used. May be constructed using the ``'^'``
4100    operator.
4101
4102    Example::
4103
4104        # construct Or using '^' operator
4105
4106        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
4107        print(number.searchString("123 3.1416 789"))
4108
4109    prints::
4110
4111        [['123'], ['3.1416'], ['789']]
4112    """
4113    def __init__(self, exprs, savelist=False):
4114        super(Or, self).__init__(exprs, savelist)
4115        if self.exprs:
4116            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4117        else:
4118            self.mayReturnEmpty = True
4119
4120    def streamline(self):
4121        super(Or, self).streamline()
4122        if __compat__.collect_all_And_tokens:
4123            self.saveAsList = any(e.saveAsList for e in self.exprs)
4124        return self
4125
4126    def parseImpl(self, instring, loc, doActions=True):
4127        maxExcLoc = -1
4128        maxException = None
4129        matches = []
4130        for e in self.exprs:
4131            try:
4132                loc2 = e.tryParse(instring, loc)
4133            except ParseException as err:
4134                err.__traceback__ = None
4135                if err.loc > maxExcLoc:
4136                    maxException = err
4137                    maxExcLoc = err.loc
4138            except IndexError:
4139                if len(instring) > maxExcLoc:
4140                    maxException = ParseException(instring, len(instring), e.errmsg, self)
4141                    maxExcLoc = len(instring)
4142            else:
4143                # save match among all matches, to retry longest to shortest
4144                matches.append((loc2, e))
4145
4146        if matches:
4147            # re-evaluate all matches in descending order of length of match, in case attached actions
4148            # might change whether or how much they match of the input.
4149            matches.sort(key=itemgetter(0), reverse=True)
4150
4151            if not doActions:
4152                # no further conditions or parse actions to change the selection of
4153                # alternative, so the first match will be the best match
4154                best_expr = matches[0][1]
4155                return best_expr._parse(instring, loc, doActions)
4156
4157            longest = -1, None
4158            for loc1, expr1 in matches:
4159                if loc1 <= longest[0]:
4160                    # already have a longer match than this one will deliver, we are done
4161                    return longest
4162
4163                try:
4164                    loc2, toks = expr1._parse(instring, loc, doActions)
4165                except ParseException as err:
4166                    err.__traceback__ = None
4167                    if err.loc > maxExcLoc:
4168                        maxException = err
4169                        maxExcLoc = err.loc
4170                else:
4171                    if loc2 >= loc1:
4172                        return loc2, toks
4173                    # didn't match as much as before
4174                    elif loc2 > longest[0]:
4175                        longest = loc2, toks
4176
4177            if longest != (-1, None):
4178                return longest
4179
4180        if maxException is not None:
4181            maxException.msg = self.errmsg
4182            raise maxException
4183        else:
4184            raise ParseException(instring, loc, "no defined alternatives to match", self)
4185
4186
4187    def __ixor__(self, other):
4188        if isinstance(other, basestring):
4189            other = self._literalStringClass(other)
4190        return self.append(other)  # Or([self, other])
4191
4192    def __str__(self):
4193        if hasattr(self, "name"):
4194            return self.name
4195
4196        if self.strRepr is None:
4197            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
4198
4199        return self.strRepr
4200
4201    def checkRecursion(self, parseElementList):
4202        subRecCheckList = parseElementList[:] + [self]
4203        for e in self.exprs:
4204            e.checkRecursion(subRecCheckList)
4205
4206    def _setResultsName(self, name, listAllMatches=False):
4207        if (not __compat__.collect_all_And_tokens
4208                and __diag__.warn_multiple_tokens_in_named_alternation):
4209            if any(isinstance(e, And) for e in self.exprs):
4210                warnings.warn("{0}: setting results name {1!r} on {2} expression "
4211                              "may only return a single token for an And alternative, "
4212                              "in future will return the full list of tokens".format(
4213                    "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4214                    stacklevel=3)
4215
4216        return super(Or, self)._setResultsName(name, listAllMatches)
4217
4218
4219class MatchFirst(ParseExpression):
4220    """Requires that at least one :class:`ParseExpression` is found. If
4221    two expressions match, the first one listed is the one that will
4222    match. May be constructed using the ``'|'`` operator.
4223
4224    Example::
4225
4226        # construct MatchFirst using '|' operator
4227
4228        # watch the order of expressions to match
4229        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
4230        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
4231
4232        # put more selective expression first
4233        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
4234        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
4235    """
4236    def __init__(self, exprs, savelist=False):
4237        super(MatchFirst, self).__init__(exprs, savelist)
4238        if self.exprs:
4239            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
4240        else:
4241            self.mayReturnEmpty = True
4242
4243    def streamline(self):
4244        super(MatchFirst, self).streamline()
4245        if __compat__.collect_all_And_tokens:
4246            self.saveAsList = any(e.saveAsList for e in self.exprs)
4247        return self
4248
4249    def parseImpl(self, instring, loc, doActions=True):
4250        maxExcLoc = -1
4251        maxException = None
4252        for e in self.exprs:
4253            try:
4254                ret = e._parse(instring, loc, doActions)
4255                return ret
4256            except ParseException as err:
4257                if err.loc > maxExcLoc:
4258                    maxException = err
4259                    maxExcLoc = err.loc
4260            except IndexError:
4261                if len(instring) > maxExcLoc:
4262                    maxException = ParseException(instring, len(instring), e.errmsg, self)
4263                    maxExcLoc = len(instring)
4264
4265        # only got here if no expression matched, raise exception for match that made it the furthest
4266        else:
4267            if maxException is not None:
4268                maxException.msg = self.errmsg
4269                raise maxException
4270            else:
4271                raise ParseException(instring, loc, "no defined alternatives to match", self)
4272
4273    def __ior__(self, other):
4274        if isinstance(other, basestring):
4275            other = self._literalStringClass(other)
4276        return self.append(other)  # MatchFirst([self, other])
4277
4278    def __str__(self):
4279        if hasattr(self, "name"):
4280            return self.name
4281
4282        if self.strRepr is None:
4283            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
4284
4285        return self.strRepr
4286
4287    def checkRecursion(self, parseElementList):
4288        subRecCheckList = parseElementList[:] + [self]
4289        for e in self.exprs:
4290            e.checkRecursion(subRecCheckList)
4291
4292    def _setResultsName(self, name, listAllMatches=False):
4293        if (not __compat__.collect_all_And_tokens
4294                and __diag__.warn_multiple_tokens_in_named_alternation):
4295            if any(isinstance(e, And) for e in self.exprs):
4296                warnings.warn("{0}: setting results name {1!r} on {2} expression "
4297                              "may only return a single token for an And alternative, "
4298                              "in future will return the full list of tokens".format(
4299                    "warn_multiple_tokens_in_named_alternation", name, type(self).__name__),
4300                    stacklevel=3)
4301
4302        return super(MatchFirst, self)._setResultsName(name, listAllMatches)
4303
4304
4305class Each(ParseExpression):
4306    """Requires all given :class:`ParseExpression` s to be found, but in
4307    any order. Expressions may be separated by whitespace.
4308
4309    May be constructed using the ``'&'`` operator.
4310
4311    Example::
4312
4313        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
4314        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
4315        integer = Word(nums)
4316        shape_attr = "shape:" + shape_type("shape")
4317        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
4318        color_attr = "color:" + color("color")
4319        size_attr = "size:" + integer("size")
4320
4321        # use Each (using operator '&') to accept attributes in any order
4322        # (shape and posn are required, color and size are optional)
4323        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
4324
4325        shape_spec.runTests('''
4326            shape: SQUARE color: BLACK posn: 100, 120
4327            shape: CIRCLE size: 50 color: BLUE posn: 50,80
4328            color:GREEN size:20 shape:TRIANGLE posn:20,40
4329            '''
4330            )
4331
4332    prints::
4333
4334        shape: SQUARE color: BLACK posn: 100, 120
4335        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
4336        - color: BLACK
4337        - posn: ['100', ',', '120']
4338          - x: 100
4339          - y: 120
4340        - shape: SQUARE
4341
4342
4343        shape: CIRCLE size: 50 color: BLUE posn: 50,80
4344        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
4345        - color: BLUE
4346        - posn: ['50', ',', '80']
4347          - x: 50
4348          - y: 80
4349        - shape: CIRCLE
4350        - size: 50
4351
4352
4353        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
4354        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
4355        - color: GREEN
4356        - posn: ['20', ',', '40']
4357          - x: 20
4358          - y: 40
4359        - shape: TRIANGLE
4360        - size: 20
4361    """
4362    def __init__(self, exprs, savelist=True):
4363        super(Each, self).__init__(exprs, savelist)
4364        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4365        self.skipWhitespace = True
4366        self.initExprGroups = True
4367        self.saveAsList = True
4368
4369    def streamline(self):
4370        super(Each, self).streamline()
4371        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
4372        return self
4373
4374    def parseImpl(self, instring, loc, doActions=True):
4375        if self.initExprGroups:
4376            self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional))
4377            opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)]
4378            opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))]
4379            self.optionals = opt1 + opt2
4380            self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)]
4381            self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)]
4382            self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))]
4383            self.required += self.multirequired
4384            self.initExprGroups = False
4385        tmpLoc = loc
4386        tmpReqd = self.required[:]
4387        tmpOpt  = self.optionals[:]
4388        matchOrder = []
4389
4390        keepMatching = True
4391        while keepMatching:
4392            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
4393            failed = []
4394            for e in tmpExprs:
4395                try:
4396                    tmpLoc = e.tryParse(instring, tmpLoc)
4397                except ParseException:
4398                    failed.append(e)
4399                else:
4400                    matchOrder.append(self.opt1map.get(id(e), e))
4401                    if e in tmpReqd:
4402                        tmpReqd.remove(e)
4403                    elif e in tmpOpt:
4404                        tmpOpt.remove(e)
4405            if len(failed) == len(tmpExprs):
4406                keepMatching = False
4407
4408        if tmpReqd:
4409            missing = ", ".join(_ustr(e) for e in tmpReqd)
4410            raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing)
4411
4412        # add any unmatched Optionals, in case they have default values defined
4413        matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt]
4414
4415        resultlist = []
4416        for e in matchOrder:
4417            loc, results = e._parse(instring, loc, doActions)
4418            resultlist.append(results)
4419
4420        finalResults = sum(resultlist, ParseResults([]))
4421        return loc, finalResults
4422
4423    def __str__(self):
4424        if hasattr(self, "name"):
4425            return self.name
4426
4427        if self.strRepr is None:
4428            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
4429
4430        return self.strRepr
4431
4432    def checkRecursion(self, parseElementList):
4433        subRecCheckList = parseElementList[:] + [self]
4434        for e in self.exprs:
4435            e.checkRecursion(subRecCheckList)
4436
4437
4438class ParseElementEnhance(ParserElement):
4439    """Abstract subclass of :class:`ParserElement`, for combining and
4440    post-processing parsed tokens.
4441    """
4442    def __init__(self, expr, savelist=False):
4443        super(ParseElementEnhance, self).__init__(savelist)
4444        if isinstance(expr, basestring):
4445            if issubclass(self._literalStringClass, Token):
4446                expr = self._literalStringClass(expr)
4447            else:
4448                expr = self._literalStringClass(Literal(expr))
4449        self.expr = expr
4450        self.strRepr = None
4451        if expr is not None:
4452            self.mayIndexError = expr.mayIndexError
4453            self.mayReturnEmpty = expr.mayReturnEmpty
4454            self.setWhitespaceChars(expr.whiteChars)
4455            self.skipWhitespace = expr.skipWhitespace
4456            self.saveAsList = expr.saveAsList
4457            self.callPreparse = expr.callPreparse
4458            self.ignoreExprs.extend(expr.ignoreExprs)
4459
4460    def parseImpl(self, instring, loc, doActions=True):
4461        if self.expr is not None:
4462            return self.expr._parse(instring, loc, doActions, callPreParse=False)
4463        else:
4464            raise ParseException("", loc, self.errmsg, self)
4465
4466    def leaveWhitespace(self):
4467        self.skipWhitespace = False
4468        self.expr = self.expr.copy()
4469        if self.expr is not None:
4470            self.expr.leaveWhitespace()
4471        return self
4472
4473    def ignore(self, other):
4474        if isinstance(other, Suppress):
4475            if other not in self.ignoreExprs:
4476                super(ParseElementEnhance, self).ignore(other)
4477                if self.expr is not None:
4478                    self.expr.ignore(self.ignoreExprs[-1])
4479        else:
4480            super(ParseElementEnhance, self).ignore(other)
4481            if self.expr is not None:
4482                self.expr.ignore(self.ignoreExprs[-1])
4483        return self
4484
4485    def streamline(self):
4486        super(ParseElementEnhance, self).streamline()
4487        if self.expr is not None:
4488            self.expr.streamline()
4489        return self
4490
4491    def checkRecursion(self, parseElementList):
4492        if self in parseElementList:
4493            raise RecursiveGrammarException(parseElementList + [self])
4494        subRecCheckList = parseElementList[:] + [self]
4495        if self.expr is not None:
4496            self.expr.checkRecursion(subRecCheckList)
4497
4498    def validate(self, validateTrace=None):
4499        if validateTrace is None:
4500            validateTrace = []
4501        tmp = validateTrace[:] + [self]
4502        if self.expr is not None:
4503            self.expr.validate(tmp)
4504        self.checkRecursion([])
4505
4506    def __str__(self):
4507        try:
4508            return super(ParseElementEnhance, self).__str__()
4509        except Exception:
4510            pass
4511
4512        if self.strRepr is None and self.expr is not None:
4513            self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr))
4514        return self.strRepr
4515
4516
4517class FollowedBy(ParseElementEnhance):
4518    """Lookahead matching of the given parse expression.
4519    ``FollowedBy`` does *not* advance the parsing position within
4520    the input string, it only verifies that the specified parse
4521    expression matches at the current position.  ``FollowedBy``
4522    always returns a null token list. If any results names are defined
4523    in the lookahead expression, those *will* be returned for access by
4524    name.
4525
4526    Example::
4527
4528        # use FollowedBy to match a label only if it is followed by a ':'
4529        data_word = Word(alphas)
4530        label = data_word + FollowedBy(':')
4531        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4532
4533        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
4534
4535    prints::
4536
4537        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
4538    """
4539    def __init__(self, expr):
4540        super(FollowedBy, self).__init__(expr)
4541        self.mayReturnEmpty = True
4542
4543    def parseImpl(self, instring, loc, doActions=True):
4544        # by using self._expr.parse and deleting the contents of the returned ParseResults list
4545        # we keep any named results that were defined in the FollowedBy expression
4546        _, ret = self.expr._parse(instring, loc, doActions=doActions)
4547        del ret[:]
4548
4549        return loc, ret
4550
4551
4552class PrecededBy(ParseElementEnhance):
4553    """Lookbehind matching of the given parse expression.
4554    ``PrecededBy`` does not advance the parsing position within the
4555    input string, it only verifies that the specified parse expression
4556    matches prior to the current position.  ``PrecededBy`` always
4557    returns a null token list, but if a results name is defined on the
4558    given expression, it is returned.
4559
4560    Parameters:
4561
4562     - expr - expression that must match prior to the current parse
4563       location
4564     - retreat - (default= ``None``) - (int) maximum number of characters
4565       to lookbehind prior to the current parse location
4566
4567    If the lookbehind expression is a string, Literal, Keyword, or
4568    a Word or CharsNotIn with a specified exact or maximum length, then
4569    the retreat parameter is not required. Otherwise, retreat must be
4570    specified to give a maximum number of characters to look back from
4571    the current parse position for a lookbehind match.
4572
4573    Example::
4574
4575        # VB-style variable names with type prefixes
4576        int_var = PrecededBy("#") + pyparsing_common.identifier
4577        str_var = PrecededBy("$") + pyparsing_common.identifier
4578
4579    """
4580    def __init__(self, expr, retreat=None):
4581        super(PrecededBy, self).__init__(expr)
4582        self.expr = self.expr().leaveWhitespace()
4583        self.mayReturnEmpty = True
4584        self.mayIndexError = False
4585        self.exact = False
4586        if isinstance(expr, str):
4587            retreat = len(expr)
4588            self.exact = True
4589        elif isinstance(expr, (Literal, Keyword)):
4590            retreat = expr.matchLen
4591            self.exact = True
4592        elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT:
4593            retreat = expr.maxLen
4594            self.exact = True
4595        elif isinstance(expr, _PositionToken):
4596            retreat = 0
4597            self.exact = True
4598        self.retreat = retreat
4599        self.errmsg = "not preceded by " + str(expr)
4600        self.skipWhitespace = False
4601        self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None)))
4602
4603    def parseImpl(self, instring, loc=0, doActions=True):
4604        if self.exact:
4605            if loc < self.retreat:
4606                raise ParseException(instring, loc, self.errmsg)
4607            start = loc - self.retreat
4608            _, ret = self.expr._parse(instring, start)
4609        else:
4610            # retreat specified a maximum lookbehind window, iterate
4611            test_expr = self.expr + StringEnd()
4612            instring_slice = instring[max(0, loc - self.retreat):loc]
4613            last_expr = ParseException(instring, loc, self.errmsg)
4614            for offset in range(1, min(loc, self.retreat + 1)+1):
4615                try:
4616                    # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:]))
4617                    _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset)
4618                except ParseBaseException as pbe:
4619                    last_expr = pbe
4620                else:
4621                    break
4622            else:
4623                raise last_expr
4624        return loc, ret
4625
4626
4627class NotAny(ParseElementEnhance):
4628    """Lookahead to disallow matching with the given parse expression.
4629    ``NotAny`` does *not* advance the parsing position within the
4630    input string, it only verifies that the specified parse expression
4631    does *not* match at the current position.  Also, ``NotAny`` does
4632    *not* skip over leading whitespace. ``NotAny`` always returns
4633    a null token list.  May be constructed using the '~' operator.
4634
4635    Example::
4636
4637        AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split())
4638
4639        # take care not to mistake keywords for identifiers
4640        ident = ~(AND | OR | NOT) + Word(alphas)
4641        boolean_term = Optional(NOT) + ident
4642
4643        # very crude boolean expression - to support parenthesis groups and
4644        # operation hierarchy, use infixNotation
4645        boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term)
4646
4647        # integers that are followed by "." are actually floats
4648        integer = Word(nums) + ~Char(".")
4649    """
4650    def __init__(self, expr):
4651        super(NotAny, self).__init__(expr)
4652        # ~ self.leaveWhitespace()
4653        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
4654        self.mayReturnEmpty = True
4655        self.errmsg = "Found unwanted token, " + _ustr(self.expr)
4656
4657    def parseImpl(self, instring, loc, doActions=True):
4658        if self.expr.canParseNext(instring, loc):
4659            raise ParseException(instring, loc, self.errmsg, self)
4660        return loc, []
4661
4662    def __str__(self):
4663        if hasattr(self, "name"):
4664            return self.name
4665
4666        if self.strRepr is None:
4667            self.strRepr = "~{" + _ustr(self.expr) + "}"
4668
4669        return self.strRepr
4670
4671class _MultipleMatch(ParseElementEnhance):
4672    def __init__(self, expr, stopOn=None):
4673        super(_MultipleMatch, self).__init__(expr)
4674        self.saveAsList = True
4675        ender = stopOn
4676        if isinstance(ender, basestring):
4677            ender = self._literalStringClass(ender)
4678        self.stopOn(ender)
4679
4680    def stopOn(self, ender):
4681        if isinstance(ender, basestring):
4682            ender = self._literalStringClass(ender)
4683        self.not_ender = ~ender if ender is not None else None
4684        return self
4685
4686    def parseImpl(self, instring, loc, doActions=True):
4687        self_expr_parse = self.expr._parse
4688        self_skip_ignorables = self._skipIgnorables
4689        check_ender = self.not_ender is not None
4690        if check_ender:
4691            try_not_ender = self.not_ender.tryParse
4692
4693        # must be at least one (but first see if we are the stopOn sentinel;
4694        # if so, fail)
4695        if check_ender:
4696            try_not_ender(instring, loc)
4697        loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
4698        try:
4699            hasIgnoreExprs = (not not self.ignoreExprs)
4700            while 1:
4701                if check_ender:
4702                    try_not_ender(instring, loc)
4703                if hasIgnoreExprs:
4704                    preloc = self_skip_ignorables(instring, loc)
4705                else:
4706                    preloc = loc
4707                loc, tmptokens = self_expr_parse(instring, preloc, doActions)
4708                if tmptokens or tmptokens.haskeys():
4709                    tokens += tmptokens
4710        except (ParseException, IndexError):
4711            pass
4712
4713        return loc, tokens
4714
4715    def _setResultsName(self, name, listAllMatches=False):
4716        if __diag__.warn_ungrouped_named_tokens_in_collection:
4717            for e in [self.expr] + getattr(self.expr, 'exprs', []):
4718                if isinstance(e, ParserElement) and e.resultsName:
4719                    warnings.warn("{0}: setting results name {1!r} on {2} expression "
4720                                  "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection",
4721                                                                                       name,
4722                                                                                       type(self).__name__,
4723                                                                                       e.resultsName),
4724                                  stacklevel=3)
4725
4726        return super(_MultipleMatch, self)._setResultsName(name, listAllMatches)
4727
4728
4729class OneOrMore(_MultipleMatch):
4730    """Repetition of one or more of the given expression.
4731
4732    Parameters:
4733     - expr - expression that must match one or more times
4734     - stopOn - (default= ``None``) - expression for a terminating sentinel
4735          (only required if the sentinel would ordinarily match the repetition
4736          expression)
4737
4738    Example::
4739
4740        data_word = Word(alphas)
4741        label = data_word + FollowedBy(':')
4742        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4743
4744        text = "shape: SQUARE posn: upper left color: BLACK"
4745        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
4746
4747        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
4748        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4749        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
4750
4751        # could also be written as
4752        (attr_expr * (1,)).parseString(text).pprint()
4753    """
4754
4755    def __str__(self):
4756        if hasattr(self, "name"):
4757            return self.name
4758
4759        if self.strRepr is None:
4760            self.strRepr = "{" + _ustr(self.expr) + "}..."
4761
4762        return self.strRepr
4763
4764class ZeroOrMore(_MultipleMatch):
4765    """Optional repetition of zero or more of the given expression.
4766
4767    Parameters:
4768     - expr - expression that must match zero or more times
4769     - stopOn - (default= ``None``) - expression for a terminating sentinel
4770          (only required if the sentinel would ordinarily match the repetition
4771          expression)
4772
4773    Example: similar to :class:`OneOrMore`
4774    """
4775    def __init__(self, expr, stopOn=None):
4776        super(ZeroOrMore, self).__init__(expr, stopOn=stopOn)
4777        self.mayReturnEmpty = True
4778
4779    def parseImpl(self, instring, loc, doActions=True):
4780        try:
4781            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
4782        except (ParseException, IndexError):
4783            return loc, []
4784
4785    def __str__(self):
4786        if hasattr(self, "name"):
4787            return self.name
4788
4789        if self.strRepr is None:
4790            self.strRepr = "[" + _ustr(self.expr) + "]..."
4791
4792        return self.strRepr
4793
4794
4795class _NullToken(object):
4796    def __bool__(self):
4797        return False
4798    __nonzero__ = __bool__
4799    def __str__(self):
4800        return ""
4801
4802class Optional(ParseElementEnhance):
4803    """Optional matching of the given expression.
4804
4805    Parameters:
4806     - expr - expression that must match zero or more times
4807     - default (optional) - value to be returned if the optional expression is not found.
4808
4809    Example::
4810
4811        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
4812        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
4813        zip.runTests('''
4814            # traditional ZIP code
4815            12345
4816
4817            # ZIP+4 form
4818            12101-0001
4819
4820            # invalid ZIP
4821            98765-
4822            ''')
4823
4824    prints::
4825
4826        # traditional ZIP code
4827        12345
4828        ['12345']
4829
4830        # ZIP+4 form
4831        12101-0001
4832        ['12101-0001']
4833
4834        # invalid ZIP
4835        98765-
4836             ^
4837        FAIL: Expected end of text (at char 5), (line:1, col:6)
4838    """
4839    __optionalNotMatched = _NullToken()
4840
4841    def __init__(self, expr, default=__optionalNotMatched):
4842        super(Optional, self).__init__(expr, savelist=False)
4843        self.saveAsList = self.expr.saveAsList
4844        self.defaultValue = default
4845        self.mayReturnEmpty = True
4846
4847    def parseImpl(self, instring, loc, doActions=True):
4848        try:
4849            loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
4850        except (ParseException, IndexError):
4851            if self.defaultValue is not self.__optionalNotMatched:
4852                if self.expr.resultsName:
4853                    tokens = ParseResults([self.defaultValue])
4854                    tokens[self.expr.resultsName] = self.defaultValue
4855                else:
4856                    tokens = [self.defaultValue]
4857            else:
4858                tokens = []
4859        return loc, tokens
4860
4861    def __str__(self):
4862        if hasattr(self, "name"):
4863            return self.name
4864
4865        if self.strRepr is None:
4866            self.strRepr = "[" + _ustr(self.expr) + "]"
4867
4868        return self.strRepr
4869
4870class SkipTo(ParseElementEnhance):
4871    """Token for skipping over all undefined text until the matched
4872    expression is found.
4873
4874    Parameters:
4875     - expr - target expression marking the end of the data to be skipped
4876     - include - (default= ``False``) if True, the target expression is also parsed
4877          (the skipped text and target expression are returned as a 2-element list).
4878     - ignore - (default= ``None``) used to define grammars (typically quoted strings and
4879          comments) that might contain false matches to the target expression
4880     - failOn - (default= ``None``) define expressions that are not allowed to be
4881          included in the skipped test; if found before the target expression is found,
4882          the SkipTo is not a match
4883
4884    Example::
4885
4886        report = '''
4887            Outstanding Issues Report - 1 Jan 2000
4888
4889               # | Severity | Description                               |  Days Open
4890            -----+----------+-------------------------------------------+-----------
4891             101 | Critical | Intermittent system crash                 |          6
4892              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
4893              79 | Minor    | System slow when running too many reports |         47
4894            '''
4895        integer = Word(nums)
4896        SEP = Suppress('|')
4897        # use SkipTo to simply match everything up until the next SEP
4898        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4899        # - parse action will call token.strip() for each matched token, i.e., the description body
4900        string_data = SkipTo(SEP, ignore=quotedString)
4901        string_data.setParseAction(tokenMap(str.strip))
4902        ticket_expr = (integer("issue_num") + SEP
4903                      + string_data("sev") + SEP
4904                      + string_data("desc") + SEP
4905                      + integer("days_open"))
4906
4907        for tkt in ticket_expr.searchString(report):
4908            print tkt.dump()
4909
4910    prints::
4911
4912        ['101', 'Critical', 'Intermittent system crash', '6']
4913        - days_open: 6
4914        - desc: Intermittent system crash
4915        - issue_num: 101
4916        - sev: Critical
4917        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4918        - days_open: 14
4919        - desc: Spelling error on Login ('log|n')
4920        - issue_num: 94
4921        - sev: Cosmetic
4922        ['79', 'Minor', 'System slow when running too many reports', '47']
4923        - days_open: 47
4924        - desc: System slow when running too many reports
4925        - issue_num: 79
4926        - sev: Minor
4927    """
4928    def __init__(self, other, include=False, ignore=None, failOn=None):
4929        super(SkipTo, self).__init__(other)
4930        self.ignoreExpr = ignore
4931        self.mayReturnEmpty = True
4932        self.mayIndexError = False
4933        self.includeMatch = include
4934        self.saveAsList = False
4935        if isinstance(failOn, basestring):
4936            self.failOn = self._literalStringClass(failOn)
4937        else:
4938            self.failOn = failOn
4939        self.errmsg = "No match found for " + _ustr(self.expr)
4940
4941    def parseImpl(self, instring, loc, doActions=True):
4942        startloc = loc
4943        instrlen = len(instring)
4944        expr = self.expr
4945        expr_parse = self.expr._parse
4946        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4947        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4948
4949        tmploc = loc
4950        while tmploc <= instrlen:
4951            if self_failOn_canParseNext is not None:
4952                # break if failOn expression matches
4953                if self_failOn_canParseNext(instring, tmploc):
4954                    break
4955
4956            if self_ignoreExpr_tryParse is not None:
4957                # advance past ignore expressions
4958                while 1:
4959                    try:
4960                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4961                    except ParseBaseException:
4962                        break
4963
4964            try:
4965                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4966            except (ParseException, IndexError):
4967                # no match, advance loc in string
4968                tmploc += 1
4969            else:
4970                # matched skipto expr, done
4971                break
4972
4973        else:
4974            # ran off the end of the input string without matching skipto expr, fail
4975            raise ParseException(instring, loc, self.errmsg, self)
4976
4977        # build up return values
4978        loc = tmploc
4979        skiptext = instring[startloc:loc]
4980        skipresult = ParseResults(skiptext)
4981
4982        if self.includeMatch:
4983            loc, mat = expr_parse(instring, loc, doActions, callPreParse=False)
4984            skipresult += mat
4985
4986        return loc, skipresult
4987
4988class Forward(ParseElementEnhance):
4989    """Forward declaration of an expression to be defined later -
4990    used for recursive grammars, such as algebraic infix notation.
4991    When the expression is known, it is assigned to the ``Forward``
4992    variable using the '<<' operator.
4993
4994    Note: take care when assigning to ``Forward`` not to overlook
4995    precedence of operators.
4996
4997    Specifically, '|' has a lower precedence than '<<', so that::
4998
4999        fwdExpr << a | b | c
5000
5001    will actually be evaluated as::
5002
5003        (fwdExpr << a) | b | c
5004
5005    thereby leaving b and c out as parseable alternatives.  It is recommended that you
5006    explicitly group the values inserted into the ``Forward``::
5007
5008        fwdExpr << (a | b | c)
5009
5010    Converting to use the '<<=' operator instead will avoid this problem.
5011
5012    See :class:`ParseResults.pprint` for an example of a recursive
5013    parser created using ``Forward``.
5014    """
5015    def __init__(self, other=None):
5016        super(Forward, self).__init__(other, savelist=False)
5017
5018    def __lshift__(self, other):
5019        if isinstance(other, basestring):
5020            other = self._literalStringClass(other)
5021        self.expr = other
5022        self.strRepr = None
5023        self.mayIndexError = self.expr.mayIndexError
5024        self.mayReturnEmpty = self.expr.mayReturnEmpty
5025        self.setWhitespaceChars(self.expr.whiteChars)
5026        self.skipWhitespace = self.expr.skipWhitespace
5027        self.saveAsList = self.expr.saveAsList
5028        self.ignoreExprs.extend(self.expr.ignoreExprs)
5029        return self
5030
5031    def __ilshift__(self, other):
5032        return self << other
5033
5034    def leaveWhitespace(self):
5035        self.skipWhitespace = False
5036        return self
5037
5038    def streamline(self):
5039        if not self.streamlined:
5040            self.streamlined = True
5041            if self.expr is not None:
5042                self.expr.streamline()
5043        return self
5044
5045    def validate(self, validateTrace=None):
5046        if validateTrace is None:
5047            validateTrace = []
5048
5049        if self not in validateTrace:
5050            tmp = validateTrace[:] + [self]
5051            if self.expr is not None:
5052                self.expr.validate(tmp)
5053        self.checkRecursion([])
5054
5055    def __str__(self):
5056        if hasattr(self, "name"):
5057            return self.name
5058        if self.strRepr is not None:
5059            return self.strRepr
5060
5061        # Avoid infinite recursion by setting a temporary strRepr
5062        self.strRepr = ": ..."
5063
5064        # Use the string representation of main expression.
5065        retString = '...'
5066        try:
5067            if self.expr is not None:
5068                retString = _ustr(self.expr)[:1000]
5069            else:
5070                retString = "None"
5071        finally:
5072            self.strRepr = self.__class__.__name__ + ": " + retString
5073        return self.strRepr
5074
5075    def copy(self):
5076        if self.expr is not None:
5077            return super(Forward, self).copy()
5078        else:
5079            ret = Forward()
5080            ret <<= self
5081            return ret
5082
5083    def _setResultsName(self, name, listAllMatches=False):
5084        if __diag__.warn_name_set_on_empty_Forward:
5085            if self.expr is None:
5086                warnings.warn("{0}: setting results name {0!r} on {1} expression "
5087                              "that has no contained expression".format("warn_name_set_on_empty_Forward",
5088                                                                        name,
5089                                                                        type(self).__name__),
5090                              stacklevel=3)
5091
5092        return super(Forward, self)._setResultsName(name, listAllMatches)
5093
5094class TokenConverter(ParseElementEnhance):
5095    """
5096    Abstract subclass of :class:`ParseExpression`, for converting parsed results.
5097    """
5098    def __init__(self, expr, savelist=False):
5099        super(TokenConverter, self).__init__(expr)  # , savelist)
5100        self.saveAsList = False
5101
5102class Combine(TokenConverter):
5103    """Converter to concatenate all matching tokens to a single string.
5104    By default, the matching patterns must also be contiguous in the
5105    input string; this can be disabled by specifying
5106    ``'adjacent=False'`` in the constructor.
5107
5108    Example::
5109
5110        real = Word(nums) + '.' + Word(nums)
5111        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
5112        # will also erroneously match the following
5113        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
5114
5115        real = Combine(Word(nums) + '.' + Word(nums))
5116        print(real.parseString('3.1416')) # -> ['3.1416']
5117        # no match when there are internal spaces
5118        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
5119    """
5120    def __init__(self, expr, joinString="", adjacent=True):
5121        super(Combine, self).__init__(expr)
5122        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
5123        if adjacent:
5124            self.leaveWhitespace()
5125        self.adjacent = adjacent
5126        self.skipWhitespace = True
5127        self.joinString = joinString
5128        self.callPreparse = True
5129
5130    def ignore(self, other):
5131        if self.adjacent:
5132            ParserElement.ignore(self, other)
5133        else:
5134            super(Combine, self).ignore(other)
5135        return self
5136
5137    def postParse(self, instring, loc, tokenlist):
5138        retToks = tokenlist.copy()
5139        del retToks[:]
5140        retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults)
5141
5142        if self.resultsName and retToks.haskeys():
5143            return [retToks]
5144        else:
5145            return retToks
5146
5147class Group(TokenConverter):
5148    """Converter to return the matched tokens as a list - useful for
5149    returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions.
5150
5151    Example::
5152
5153        ident = Word(alphas)
5154        num = Word(nums)
5155        term = ident | num
5156        func = ident + Optional(delimitedList(term))
5157        print(func.parseString("fn a, b, 100"))  # -> ['fn', 'a', 'b', '100']
5158
5159        func = ident + Group(Optional(delimitedList(term)))
5160        print(func.parseString("fn a, b, 100"))  # -> ['fn', ['a', 'b', '100']]
5161    """
5162    def __init__(self, expr):
5163        super(Group, self).__init__(expr)
5164        self.saveAsList = True
5165
5166    def postParse(self, instring, loc, tokenlist):
5167        return [tokenlist]
5168
5169class Dict(TokenConverter):
5170    """Converter to return a repetitive expression as a list, but also
5171    as a dictionary. Each element can also be referenced using the first
5172    token in the expression as its key. Useful for tabular report
5173    scraping when the first column can be used as a item key.
5174
5175    Example::
5176
5177        data_word = Word(alphas)
5178        label = data_word + FollowedBy(':')
5179        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
5180
5181        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5182        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5183
5184        # print attributes as plain groups
5185        print(OneOrMore(attr_expr).parseString(text).dump())
5186
5187        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
5188        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
5189        print(result.dump())
5190
5191        # access named fields as dict entries, or output as dict
5192        print(result['shape'])
5193        print(result.asDict())
5194
5195    prints::
5196
5197        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
5198        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5199        - color: light blue
5200        - posn: upper left
5201        - shape: SQUARE
5202        - texture: burlap
5203        SQUARE
5204        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
5205
5206    See more examples at :class:`ParseResults` of accessing fields by results name.
5207    """
5208    def __init__(self, expr):
5209        super(Dict, self).__init__(expr)
5210        self.saveAsList = True
5211
5212    def postParse(self, instring, loc, tokenlist):
5213        for i, tok in enumerate(tokenlist):
5214            if len(tok) == 0:
5215                continue
5216            ikey = tok[0]
5217            if isinstance(ikey, int):
5218                ikey = _ustr(tok[0]).strip()
5219            if len(tok) == 1:
5220                tokenlist[ikey] = _ParseResultsWithOffset("", i)
5221            elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
5222                tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
5223            else:
5224                dictvalue = tok.copy()  # ParseResults(i)
5225                del dictvalue[0]
5226                if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()):
5227                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
5228                else:
5229                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
5230
5231        if self.resultsName:
5232            return [tokenlist]
5233        else:
5234            return tokenlist
5235
5236
5237class Suppress(TokenConverter):
5238    """Converter for ignoring the results of a parsed expression.
5239
5240    Example::
5241
5242        source = "a, b, c,d"
5243        wd = Word(alphas)
5244        wd_list1 = wd + ZeroOrMore(',' + wd)
5245        print(wd_list1.parseString(source))
5246
5247        # often, delimiters that are useful during parsing are just in the
5248        # way afterward - use Suppress to keep them out of the parsed output
5249        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
5250        print(wd_list2.parseString(source))
5251
5252    prints::
5253
5254        ['a', ',', 'b', ',', 'c', ',', 'd']
5255        ['a', 'b', 'c', 'd']
5256
5257    (See also :class:`delimitedList`.)
5258    """
5259    def postParse(self, instring, loc, tokenlist):
5260        return []
5261
5262    def suppress(self):
5263        return self
5264
5265
5266class OnlyOnce(object):
5267    """Wrapper for parse actions, to ensure they are only called once.
5268    """
5269    def __init__(self, methodCall):
5270        self.callable = _trim_arity(methodCall)
5271        self.called = False
5272    def __call__(self, s, l, t):
5273        if not self.called:
5274            results = self.callable(s, l, t)
5275            self.called = True
5276            return results
5277        raise ParseException(s, l, "")
5278    def reset(self):
5279        self.called = False
5280
5281def traceParseAction(f):
5282    """Decorator for debugging parse actions.
5283
5284    When the parse action is called, this decorator will print
5285    ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``.
5286    When the parse action completes, the decorator will print
5287    ``"<<"`` followed by the returned value, or any exception that the parse action raised.
5288
5289    Example::
5290
5291        wd = Word(alphas)
5292
5293        @traceParseAction
5294        def remove_duplicate_chars(tokens):
5295            return ''.join(sorted(set(''.join(tokens))))
5296
5297        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
5298        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
5299
5300    prints::
5301
5302        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
5303        <<leaving remove_duplicate_chars (ret: 'dfjkls')
5304        ['dfjkls']
5305    """
5306    f = _trim_arity(f)
5307    def z(*paArgs):
5308        thisFunc = f.__name__
5309        s, l, t = paArgs[-3:]
5310        if len(paArgs) > 3:
5311            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
5312        sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t))
5313        try:
5314            ret = f(*paArgs)
5315        except Exception as exc:
5316            sys.stderr.write("<<leaving %s (exception: %s)\n" % (thisFunc, exc))
5317            raise
5318        sys.stderr.write("<<leaving %s (ret: %r)\n" % (thisFunc, ret))
5319        return ret
5320    try:
5321        z.__name__ = f.__name__
5322    except AttributeError:
5323        pass
5324    return z
5325
5326#
5327# global helpers
5328#
5329def delimitedList(expr, delim=",", combine=False):
5330    """Helper to define a delimited list of expressions - the delimiter
5331    defaults to ','. By default, the list elements and delimiters can
5332    have intervening whitespace, and comments, but this can be
5333    overridden by passing ``combine=True`` in the constructor. If
5334    ``combine`` is set to ``True``, the matching tokens are
5335    returned as a single token string, with the delimiters included;
5336    otherwise, the matching tokens are returned as a list of tokens,
5337    with the delimiters suppressed.
5338
5339    Example::
5340
5341        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
5342        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
5343    """
5344    dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..."
5345    if combine:
5346        return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName)
5347    else:
5348        return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName)
5349
5350def countedArray(expr, intExpr=None):
5351    """Helper to define a counted list of expressions.
5352
5353    This helper defines a pattern of the form::
5354
5355        integer expr expr expr...
5356
5357    where the leading integer tells how many expr expressions follow.
5358    The matched tokens returns the array of expr tokens as a list - the
5359    leading count token is suppressed.
5360
5361    If ``intExpr`` is specified, it should be a pyparsing expression
5362    that produces an integer value.
5363
5364    Example::
5365
5366        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
5367
5368        # in this parser, the leading integer value is given in binary,
5369        # '10' indicating that 2 values are in the array
5370        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
5371        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
5372    """
5373    arrayExpr = Forward()
5374    def countFieldParseAction(s, l, t):
5375        n = t[0]
5376        arrayExpr << (n and Group(And([expr] * n)) or Group(empty))
5377        return []
5378    if intExpr is None:
5379        intExpr = Word(nums).setParseAction(lambda t: int(t[0]))
5380    else:
5381        intExpr = intExpr.copy()
5382    intExpr.setName("arrayLen")
5383    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
5384    return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...')
5385
5386def _flatten(L):
5387    ret = []
5388    for i in L:
5389        if isinstance(i, list):
5390            ret.extend(_flatten(i))
5391        else:
5392            ret.append(i)
5393    return ret
5394
5395def matchPreviousLiteral(expr):
5396    """Helper to define an expression that is indirectly defined from
5397    the tokens matched in a previous expression, that is, it looks for
5398    a 'repeat' of a previous expression.  For example::
5399
5400        first = Word(nums)
5401        second = matchPreviousLiteral(first)
5402        matchExpr = first + ":" + second
5403
5404    will match ``"1:1"``, but not ``"1:2"``.  Because this
5405    matches a previous literal, will also match the leading
5406    ``"1:1"`` in ``"1:10"``. If this is not desired, use
5407    :class:`matchPreviousExpr`. Do *not* use with packrat parsing
5408    enabled.
5409    """
5410    rep = Forward()
5411    def copyTokenToRepeater(s, l, t):
5412        if t:
5413            if len(t) == 1:
5414                rep << t[0]
5415            else:
5416                # flatten t tokens
5417                tflat = _flatten(t.asList())
5418                rep << And(Literal(tt) for tt in tflat)
5419        else:
5420            rep << Empty()
5421    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5422    rep.setName('(prev) ' + _ustr(expr))
5423    return rep
5424
5425def matchPreviousExpr(expr):
5426    """Helper to define an expression that is indirectly defined from
5427    the tokens matched in a previous expression, that is, it looks for
5428    a 'repeat' of a previous expression.  For example::
5429
5430        first = Word(nums)
5431        second = matchPreviousExpr(first)
5432        matchExpr = first + ":" + second
5433
5434    will match ``"1:1"``, but not ``"1:2"``.  Because this
5435    matches by expressions, will *not* match the leading ``"1:1"``
5436    in ``"1:10"``; the expressions are evaluated first, and then
5437    compared, so ``"1"`` is compared with ``"10"``. Do *not* use
5438    with packrat parsing enabled.
5439    """
5440    rep = Forward()
5441    e2 = expr.copy()
5442    rep <<= e2
5443    def copyTokenToRepeater(s, l, t):
5444        matchTokens = _flatten(t.asList())
5445        def mustMatchTheseTokens(s, l, t):
5446            theseTokens = _flatten(t.asList())
5447            if theseTokens != matchTokens:
5448                raise ParseException('', 0, '')
5449        rep.setParseAction(mustMatchTheseTokens, callDuringTry=True)
5450    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
5451    rep.setName('(prev) ' + _ustr(expr))
5452    return rep
5453
5454def _escapeRegexRangeChars(s):
5455    # ~  escape these chars: ^-[]
5456    for c in r"\^-[]":
5457        s = s.replace(c, _bslash + c)
5458    s = s.replace("\n", r"\n")
5459    s = s.replace("\t", r"\t")
5460    return _ustr(s)
5461
5462def oneOf(strs, caseless=False, useRegex=True, asKeyword=False):
5463    """Helper to quickly define a set of alternative Literals, and makes
5464    sure to do longest-first testing when there is a conflict,
5465    regardless of the input order, but returns
5466    a :class:`MatchFirst` for best performance.
5467
5468    Parameters:
5469
5470     - strs - a string of space-delimited literals, or a collection of
5471       string literals
5472     - caseless - (default= ``False``) - treat all literals as
5473       caseless
5474     - useRegex - (default= ``True``) - as an optimization, will
5475       generate a Regex object; otherwise, will generate
5476       a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if
5477       creating a :class:`Regex` raises an exception)
5478     - asKeyword - (default=``False``) - enforce Keyword-style matching on the
5479       generated expressions
5480
5481    Example::
5482
5483        comp_oper = oneOf("< = > <= >= !=")
5484        var = Word(alphas)
5485        number = Word(nums)
5486        term = var | number
5487        comparison_expr = term + comp_oper + term
5488        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
5489
5490    prints::
5491
5492        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
5493    """
5494    if isinstance(caseless, basestring):
5495        warnings.warn("More than one string argument passed to oneOf, pass "
5496                      "choices as a list or space-delimited string", stacklevel=2)
5497
5498    if caseless:
5499        isequal = (lambda a, b: a.upper() == b.upper())
5500        masks = (lambda a, b: b.upper().startswith(a.upper()))
5501        parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
5502    else:
5503        isequal = (lambda a, b: a == b)
5504        masks = (lambda a, b: b.startswith(a))
5505        parseElementClass = Keyword if asKeyword else Literal
5506
5507    symbols = []
5508    if isinstance(strs, basestring):
5509        symbols = strs.split()
5510    elif isinstance(strs, Iterable):
5511        symbols = list(strs)
5512    else:
5513        warnings.warn("Invalid argument to oneOf, expected string or iterable",
5514                      SyntaxWarning, stacklevel=2)
5515    if not symbols:
5516        return NoMatch()
5517
5518    if not asKeyword:
5519        # if not producing keywords, need to reorder to take care to avoid masking
5520        # longer choices with shorter ones
5521        i = 0
5522        while i < len(symbols) - 1:
5523            cur = symbols[i]
5524            for j, other in enumerate(symbols[i + 1:]):
5525                if isequal(other, cur):
5526                    del symbols[i + j + 1]
5527                    break
5528                elif masks(cur, other):
5529                    del symbols[i + j + 1]
5530                    symbols.insert(i, other)
5531                    break
5532            else:
5533                i += 1
5534
5535    if not (caseless or asKeyword) and useRegex:
5536        # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols]))
5537        try:
5538            if len(symbols) == len("".join(symbols)):
5539                return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols))
5540            else:
5541                return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols))
5542        except Exception:
5543            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
5544                    SyntaxWarning, stacklevel=2)
5545
5546    # last resort, just use MatchFirst
5547    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
5548
5549def dictOf(key, value):
5550    """Helper to easily and clearly define a dictionary by specifying
5551    the respective patterns for the key and value.  Takes care of
5552    defining the :class:`Dict`, :class:`ZeroOrMore`, and
5553    :class:`Group` tokens in the proper order.  The key pattern
5554    can include delimiting markers or punctuation, as long as they are
5555    suppressed, thereby leaving the significant key text.  The value
5556    pattern can include named results, so that the :class:`Dict` results
5557    can include named token fields.
5558
5559    Example::
5560
5561        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
5562        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
5563        print(OneOrMore(attr_expr).parseString(text).dump())
5564
5565        attr_label = label
5566        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
5567
5568        # similar to Dict, but simpler call format
5569        result = dictOf(attr_label, attr_value).parseString(text)
5570        print(result.dump())
5571        print(result['shape'])
5572        print(result.shape)  # object attribute access works too
5573        print(result.asDict())
5574
5575    prints::
5576
5577        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
5578        - color: light blue
5579        - posn: upper left
5580        - shape: SQUARE
5581        - texture: burlap
5582        SQUARE
5583        SQUARE
5584        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
5585    """
5586    return Dict(OneOrMore(Group(key + value)))
5587
5588def originalTextFor(expr, asString=True):
5589    """Helper to return the original, untokenized text for a given
5590    expression.  Useful to restore the parsed fields of an HTML start
5591    tag into the raw tag text itself, or to revert separate tokens with
5592    intervening whitespace back to the original matching input text. By
5593    default, returns astring containing the original parsed text.
5594
5595    If the optional ``asString`` argument is passed as
5596    ``False``, then the return value is
5597    a :class:`ParseResults` containing any results names that
5598    were originally matched, and a single token containing the original
5599    matched text from the input string.  So if the expression passed to
5600    :class:`originalTextFor` contains expressions with defined
5601    results names, you must set ``asString`` to ``False`` if you
5602    want to preserve those results name values.
5603
5604    Example::
5605
5606        src = "this is test <b> bold <i>text</i> </b> normal text "
5607        for tag in ("b", "i"):
5608            opener, closer = makeHTMLTags(tag)
5609            patt = originalTextFor(opener + SkipTo(closer) + closer)
5610            print(patt.searchString(src)[0])
5611
5612    prints::
5613
5614        ['<b> bold <i>text</i> </b>']
5615        ['<i>text</i>']
5616    """
5617    locMarker = Empty().setParseAction(lambda s, loc, t: loc)
5618    endlocMarker = locMarker.copy()
5619    endlocMarker.callPreparse = False
5620    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
5621    if asString:
5622        extractText = lambda s, l, t: s[t._original_start: t._original_end]
5623    else:
5624        def extractText(s, l, t):
5625            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
5626    matchExpr.setParseAction(extractText)
5627    matchExpr.ignoreExprs = expr.ignoreExprs
5628    return matchExpr
5629
5630def ungroup(expr):
5631    """Helper to undo pyparsing's default grouping of And expressions,
5632    even if all but one are non-empty.
5633    """
5634    return TokenConverter(expr).addParseAction(lambda t: t[0])
5635
5636def locatedExpr(expr):
5637    """Helper to decorate a returned token with its starting and ending
5638    locations in the input string.
5639
5640    This helper adds the following results names:
5641
5642     - locn_start = location where matched expression begins
5643     - locn_end = location where matched expression ends
5644     - value = the actual parsed results
5645
5646    Be careful if the input text contains ``<TAB>`` characters, you
5647    may want to call :class:`ParserElement.parseWithTabs`
5648
5649    Example::
5650
5651        wd = Word(alphas)
5652        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
5653            print(match)
5654
5655    prints::
5656
5657        [[0, 'ljsdf', 5]]
5658        [[8, 'lksdjjf', 15]]
5659        [[18, 'lkkjj', 23]]
5660    """
5661    locator = Empty().setParseAction(lambda s, l, t: l)
5662    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
5663
5664
5665# convenience constants for positional expressions
5666empty       = Empty().setName("empty")
5667lineStart   = LineStart().setName("lineStart")
5668lineEnd     = LineEnd().setName("lineEnd")
5669stringStart = StringStart().setName("stringStart")
5670stringEnd   = StringEnd().setName("stringEnd")
5671
5672_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1])
5673_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16)))
5674_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8)))
5675_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
5676_charRange = Group(_singleChar + Suppress("-") + _singleChar)
5677_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]"
5678
5679def srange(s):
5680    r"""Helper to easily define string ranges for use in Word
5681    construction. Borrows syntax from regexp '[]' string range
5682    definitions::
5683
5684        srange("[0-9]")   -> "0123456789"
5685        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
5686        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
5687
5688    The input string must be enclosed in []'s, and the returned string
5689    is the expanded character set joined into a single string. The
5690    values enclosed in the []'s may be:
5691
5692     - a single character
5693     - an escaped character with a leading backslash (such as ``\-``
5694       or ``\]``)
5695     - an escaped hex character with a leading ``'\x'``
5696       (``\x21``, which is a ``'!'`` character) (``\0x##``
5697       is also supported for backwards compatibility)
5698     - an escaped octal character with a leading ``'\0'``
5699       (``\041``, which is a ``'!'`` character)
5700     - a range of any of the above, separated by a dash (``'a-z'``,
5701       etc.)
5702     - any combination of the above (``'aeiouy'``,
5703       ``'a-zA-Z0-9_$'``, etc.)
5704    """
5705    _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1))
5706    try:
5707        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
5708    except Exception:
5709        return ""
5710
5711def matchOnlyAtCol(n):
5712    """Helper method for defining parse actions that require matching at
5713    a specific column in the input text.
5714    """
5715    def verifyCol(strg, locn, toks):
5716        if col(locn, strg) != n:
5717            raise ParseException(strg, locn, "matched token not at column %d" % n)
5718    return verifyCol
5719
5720def replaceWith(replStr):
5721    """Helper method for common parse actions that simply return
5722    a literal value.  Especially useful when used with
5723    :class:`transformString<ParserElement.transformString>` ().
5724
5725    Example::
5726
5727        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
5728        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
5729        term = na | num
5730
5731        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
5732    """
5733    return lambda s, l, t: [replStr]
5734
5735def removeQuotes(s, l, t):
5736    """Helper parse action for removing quotation marks from parsed
5737    quoted strings.
5738
5739    Example::
5740
5741        # by default, quotation marks are included in parsed results
5742        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
5743
5744        # use removeQuotes to strip quotation marks from parsed results
5745        quotedString.setParseAction(removeQuotes)
5746        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
5747    """
5748    return t[0][1:-1]
5749
5750def tokenMap(func, *args):
5751    """Helper to define a parse action by mapping a function to all
5752    elements of a ParseResults list. If any additional args are passed,
5753    they are forwarded to the given function as additional arguments
5754    after the token, as in
5755    ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``,
5756    which will convert the parsed data to an integer using base 16.
5757
5758    Example (compare the last to example in :class:`ParserElement.transformString`::
5759
5760        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
5761        hex_ints.runTests('''
5762            00 11 22 aa FF 0a 0d 1a
5763            ''')
5764
5765        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
5766        OneOrMore(upperword).runTests('''
5767            my kingdom for a horse
5768            ''')
5769
5770        wd = Word(alphas).setParseAction(tokenMap(str.title))
5771        OneOrMore(wd).setParseAction(' '.join).runTests('''
5772            now is the winter of our discontent made glorious summer by this sun of york
5773            ''')
5774
5775    prints::
5776
5777        00 11 22 aa FF 0a 0d 1a
5778        [0, 17, 34, 170, 255, 10, 13, 26]
5779
5780        my kingdom for a horse
5781        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
5782
5783        now is the winter of our discontent made glorious summer by this sun of york
5784        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
5785    """
5786    def pa(s, l, t):
5787        return [func(tokn, *args) for tokn in t]
5788
5789    try:
5790        func_name = getattr(func, '__name__',
5791                            getattr(func, '__class__').__name__)
5792    except Exception:
5793        func_name = str(func)
5794    pa.__name__ = func_name
5795
5796    return pa
5797
5798upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
5799"""(Deprecated) Helper parse action to convert tokens to upper case.
5800Deprecated in favor of :class:`pyparsing_common.upcaseTokens`"""
5801
5802downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
5803"""(Deprecated) Helper parse action to convert tokens to lower case.
5804Deprecated in favor of :class:`pyparsing_common.downcaseTokens`"""
5805
5806def _makeTags(tagStr, xml,
5807              suppress_LT=Suppress("<"),
5808              suppress_GT=Suppress(">")):
5809    """Internal helper to construct opening and closing tag expressions, given a tag name"""
5810    if isinstance(tagStr, basestring):
5811        resname = tagStr
5812        tagStr = Keyword(tagStr, caseless=not xml)
5813    else:
5814        resname = tagStr.name
5815
5816    tagAttrName = Word(alphas, alphanums + "_-:")
5817    if xml:
5818        tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes)
5819        openTag = (suppress_LT
5820                   + tagStr("tag")
5821                   + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
5822                   + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5823                   + suppress_GT)
5824    else:
5825        tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">")
5826        openTag = (suppress_LT
5827                   + tagStr("tag")
5828                   + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens)
5829                                           + Optional(Suppress("=") + tagAttrValue))))
5830                   + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/')
5831                   + suppress_GT)
5832    closeTag = Combine(_L("</") + tagStr + ">", adjacent=False)
5833
5834    openTag.setName("<%s>" % resname)
5835    # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
5836    openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy()))
5837    closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("</%s>" % resname)
5838    openTag.tag = resname
5839    closeTag.tag = resname
5840    openTag.tag_body = SkipTo(closeTag())
5841    return openTag, closeTag
5842
5843def makeHTMLTags(tagStr):
5844    """Helper to construct opening and closing tag expressions for HTML,
5845    given a tag name. Matches tags in either upper or lower case,
5846    attributes with namespaces and with quoted or unquoted values.
5847
5848    Example::
5849
5850        text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
5851        # makeHTMLTags returns pyparsing expressions for the opening and
5852        # closing tags as a 2-tuple
5853        a, a_end = makeHTMLTags("A")
5854        link_expr = a + SkipTo(a_end)("link_text") + a_end
5855
5856        for link in link_expr.searchString(text):
5857            # attributes in the <A> tag (like "href" shown here) are
5858            # also accessible as named results
5859            print(link.link_text, '->', link.href)
5860
5861    prints::
5862
5863        pyparsing -> https://github.com/pyparsing/pyparsing/wiki
5864    """
5865    return _makeTags(tagStr, False)
5866
5867def makeXMLTags(tagStr):
5868    """Helper to construct opening and closing tag expressions for XML,
5869    given a tag name. Matches tags only in the given upper/lower case.
5870
5871    Example: similar to :class:`makeHTMLTags`
5872    """
5873    return _makeTags(tagStr, True)
5874
5875def withAttribute(*args, **attrDict):
5876    """Helper to create a validating parse action to be used with start
5877    tags created with :class:`makeXMLTags` or
5878    :class:`makeHTMLTags`. Use ``withAttribute`` to qualify
5879    a starting tag with a required attribute value, to avoid false
5880    matches on common tags such as ``<TD>`` or ``<DIV>``.
5881
5882    Call ``withAttribute`` with a series of attribute names and
5883    values. Specify the list of filter attributes names and values as:
5884
5885     - keyword arguments, as in ``(align="right")``, or
5886     - as an explicit dict with ``**`` operator, when an attribute
5887       name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
5888     - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
5889
5890    For attribute names with a namespace prefix, you must use the second
5891    form.  Attribute names are matched insensitive to upper/lower case.
5892
5893    If just testing for ``class`` (with or without a namespace), use
5894    :class:`withClass`.
5895
5896    To verify that the attribute exists, but without specifying a value,
5897    pass ``withAttribute.ANY_VALUE`` as the value.
5898
5899    Example::
5900
5901        html = '''
5902            <div>
5903            Some text
5904            <div type="grid">1 4 0 1 0</div>
5905            <div type="graph">1,3 2,3 1,1</div>
5906            <div>this has no type</div>
5907            </div>
5908
5909        '''
5910        div,div_end = makeHTMLTags("div")
5911
5912        # only match div tag having a type attribute with value "grid"
5913        div_grid = div().setParseAction(withAttribute(type="grid"))
5914        grid_expr = div_grid + SkipTo(div | div_end)("body")
5915        for grid_header in grid_expr.searchString(html):
5916            print(grid_header.body)
5917
5918        # construct a match with any div tag having a type attribute, regardless of the value
5919        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
5920        div_expr = div_any_type + SkipTo(div | div_end)("body")
5921        for div_header in div_expr.searchString(html):
5922            print(div_header.body)
5923
5924    prints::
5925
5926        1 4 0 1 0
5927
5928        1 4 0 1 0
5929        1,3 2,3 1,1
5930    """
5931    if args:
5932        attrs = args[:]
5933    else:
5934        attrs = attrDict.items()
5935    attrs = [(k, v) for k, v in attrs]
5936    def pa(s, l, tokens):
5937        for attrName, attrValue in attrs:
5938            if attrName not in tokens:
5939                raise ParseException(s, l, "no matching attribute " + attrName)
5940            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
5941                raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" %
5942                                            (attrName, tokens[attrName], attrValue))
5943    return pa
5944withAttribute.ANY_VALUE = object()
5945
5946def withClass(classname, namespace=''):
5947    """Simplified version of :class:`withAttribute` when
5948    matching on a div class - made difficult because ``class`` is
5949    a reserved word in Python.
5950
5951    Example::
5952
5953        html = '''
5954            <div>
5955            Some text
5956            <div class="grid">1 4 0 1 0</div>
5957            <div class="graph">1,3 2,3 1,1</div>
5958            <div>this &lt;div&gt; has no class</div>
5959            </div>
5960
5961        '''
5962        div,div_end = makeHTMLTags("div")
5963        div_grid = div().setParseAction(withClass("grid"))
5964
5965        grid_expr = div_grid + SkipTo(div | div_end)("body")
5966        for grid_header in grid_expr.searchString(html):
5967            print(grid_header.body)
5968
5969        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5970        div_expr = div_any_type + SkipTo(div | div_end)("body")
5971        for div_header in div_expr.searchString(html):
5972            print(div_header.body)
5973
5974    prints::
5975
5976        1 4 0 1 0
5977
5978        1 4 0 1 0
5979        1,3 2,3 1,1
5980    """
5981    classattr = "%s:class" % namespace if namespace else "class"
5982    return withAttribute(**{classattr: classname})
5983
5984opAssoc = SimpleNamespace()
5985opAssoc.LEFT = object()
5986opAssoc.RIGHT = object()
5987
5988def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')):
5989    """Helper method for constructing grammars of expressions made up of
5990    operators working in a precedence hierarchy.  Operators may be unary
5991    or binary, left- or right-associative.  Parse actions can also be
5992    attached to operator expressions. The generated parser will also
5993    recognize the use of parentheses to override operator precedences
5994    (see example below).
5995
5996    Note: if you define a deep operator list, you may see performance
5997    issues when using infixNotation. See
5998    :class:`ParserElement.enablePackrat` for a mechanism to potentially
5999    improve your parser performance.
6000
6001    Parameters:
6002     - baseExpr - expression representing the most basic element for the
6003       nested
6004     - opList - list of tuples, one for each operator precedence level
6005       in the expression grammar; each tuple is of the form ``(opExpr,
6006       numTerms, rightLeftAssoc, parseAction)``, where:
6007
6008       - opExpr is the pyparsing expression for the operator; may also
6009         be a string, which will be converted to a Literal; if numTerms
6010         is 3, opExpr is a tuple of two expressions, for the two
6011         operators separating the 3 terms
6012       - numTerms is the number of terms for this operator (must be 1,
6013         2, or 3)
6014       - rightLeftAssoc is the indicator whether the operator is right
6015         or left associative, using the pyparsing-defined constants
6016         ``opAssoc.RIGHT`` and ``opAssoc.LEFT``.
6017       - parseAction is the parse action to be associated with
6018         expressions matching this operator expression (the parse action
6019         tuple member may be omitted); if the parse action is passed
6020         a tuple or list of functions, this is equivalent to calling
6021         ``setParseAction(*fn)``
6022         (:class:`ParserElement.setParseAction`)
6023     - lpar - expression for matching left-parentheses
6024       (default= ``Suppress('(')``)
6025     - rpar - expression for matching right-parentheses
6026       (default= ``Suppress(')')``)
6027
6028    Example::
6029
6030        # simple example of four-function arithmetic with ints and
6031        # variable names
6032        integer = pyparsing_common.signed_integer
6033        varname = pyparsing_common.identifier
6034
6035        arith_expr = infixNotation(integer | varname,
6036            [
6037            ('-', 1, opAssoc.RIGHT),
6038            (oneOf('* /'), 2, opAssoc.LEFT),
6039            (oneOf('+ -'), 2, opAssoc.LEFT),
6040            ])
6041
6042        arith_expr.runTests('''
6043            5+3*6
6044            (5+3)*6
6045            -2--11
6046            ''', fullDump=False)
6047
6048    prints::
6049
6050        5+3*6
6051        [[5, '+', [3, '*', 6]]]
6052
6053        (5+3)*6
6054        [[[5, '+', 3], '*', 6]]
6055
6056        -2--11
6057        [[['-', 2], '-', ['-', 11]]]
6058    """
6059    # captive version of FollowedBy that does not do parse actions or capture results names
6060    class _FB(FollowedBy):
6061        def parseImpl(self, instring, loc, doActions=True):
6062            self.expr.tryParse(instring, loc)
6063            return loc, []
6064
6065    ret = Forward()
6066    lastExpr = baseExpr | (lpar + ret + rpar)
6067    for i, operDef in enumerate(opList):
6068        opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4]
6069        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
6070        if arity == 3:
6071            if opExpr is None or len(opExpr) != 2:
6072                raise ValueError(
6073                    "if numterms=3, opExpr must be a tuple or list of two expressions")
6074            opExpr1, opExpr2 = opExpr
6075        thisExpr = Forward().setName(termName)
6076        if rightLeftAssoc == opAssoc.LEFT:
6077            if arity == 1:
6078                matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr))
6079            elif arity == 2:
6080                if opExpr is not None:
6081                    matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr))
6082                else:
6083                    matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr))
6084            elif arity == 3:
6085                matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr)
6086                             + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr)))
6087            else:
6088                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6089        elif rightLeftAssoc == opAssoc.RIGHT:
6090            if arity == 1:
6091                # try to avoid LR with this extra test
6092                if not isinstance(opExpr, Optional):
6093                    opExpr = Optional(opExpr)
6094                matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
6095            elif arity == 2:
6096                if opExpr is not None:
6097                    matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr))
6098                else:
6099                    matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr))
6100            elif arity == 3:
6101                matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
6102                             + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr))
6103            else:
6104                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
6105        else:
6106            raise ValueError("operator must indicate right or left associativity")
6107        if pa:
6108            if isinstance(pa, (tuple, list)):
6109                matchExpr.setParseAction(*pa)
6110            else:
6111                matchExpr.setParseAction(pa)
6112        thisExpr <<= (matchExpr.setName(termName) | lastExpr)
6113        lastExpr = thisExpr
6114    ret <<= lastExpr
6115    return ret
6116
6117operatorPrecedence = infixNotation
6118"""(Deprecated) Former name of :class:`infixNotation`, will be
6119dropped in a future release."""
6120
6121dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes")
6122sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes")
6123quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"'
6124                       | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes")
6125unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
6126
6127def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
6128    """Helper method for defining nested lists enclosed in opening and
6129    closing delimiters ("(" and ")" are the default).
6130
6131    Parameters:
6132     - opener - opening character for a nested list
6133       (default= ``"("``); can also be a pyparsing expression
6134     - closer - closing character for a nested list
6135       (default= ``")"``); can also be a pyparsing expression
6136     - content - expression for items within the nested lists
6137       (default= ``None``)
6138     - ignoreExpr - expression for ignoring opening and closing
6139       delimiters (default= :class:`quotedString`)
6140
6141    If an expression is not provided for the content argument, the
6142    nested expression will capture all whitespace-delimited content
6143    between delimiters as a list of separate values.
6144
6145    Use the ``ignoreExpr`` argument to define expressions that may
6146    contain opening or closing characters that should not be treated as
6147    opening or closing characters for nesting, such as quotedString or
6148    a comment expression.  Specify multiple expressions using an
6149    :class:`Or` or :class:`MatchFirst`. The default is
6150    :class:`quotedString`, but if no expressions are to be ignored, then
6151    pass ``None`` for this argument.
6152
6153    Example::
6154
6155        data_type = oneOf("void int short long char float double")
6156        decl_data_type = Combine(data_type + Optional(Word('*')))
6157        ident = Word(alphas+'_', alphanums+'_')
6158        number = pyparsing_common.number
6159        arg = Group(decl_data_type + ident)
6160        LPAR, RPAR = map(Suppress, "()")
6161
6162        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
6163
6164        c_function = (decl_data_type("type")
6165                      + ident("name")
6166                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
6167                      + code_body("body"))
6168        c_function.ignore(cStyleComment)
6169
6170        source_code = '''
6171            int is_odd(int x) {
6172                return (x%2);
6173            }
6174
6175            int dec_to_hex(char hchar) {
6176                if (hchar >= '0' && hchar <= '9') {
6177                    return (ord(hchar)-ord('0'));
6178                } else {
6179                    return (10+ord(hchar)-ord('A'));
6180                }
6181            }
6182        '''
6183        for func in c_function.searchString(source_code):
6184            print("%(name)s (%(type)s) args: %(args)s" % func)
6185
6186
6187    prints::
6188
6189        is_odd (int) args: [['int', 'x']]
6190        dec_to_hex (int) args: [['char', 'hchar']]
6191    """
6192    if opener == closer:
6193        raise ValueError("opening and closing strings cannot be the same")
6194    if content is None:
6195        if isinstance(opener, basestring) and isinstance(closer, basestring):
6196            if len(opener) == 1 and len(closer) == 1:
6197                if ignoreExpr is not None:
6198                    content = (Combine(OneOrMore(~ignoreExpr
6199                                                 + CharsNotIn(opener
6200                                                              + closer
6201                                                              + ParserElement.DEFAULT_WHITE_CHARS, exact=1)
6202                                                 )
6203                                       ).setParseAction(lambda t: t[0].strip()))
6204                else:
6205                    content = (empty.copy() + CharsNotIn(opener
6206                                                         + closer
6207                                                         + ParserElement.DEFAULT_WHITE_CHARS
6208                                                         ).setParseAction(lambda t: t[0].strip()))
6209            else:
6210                if ignoreExpr is not None:
6211                    content = (Combine(OneOrMore(~ignoreExpr
6212                                                 + ~Literal(opener)
6213                                                 + ~Literal(closer)
6214                                                 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6215                                       ).setParseAction(lambda t: t[0].strip()))
6216                else:
6217                    content = (Combine(OneOrMore(~Literal(opener)
6218                                                 + ~Literal(closer)
6219                                                 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1))
6220                                       ).setParseAction(lambda t: t[0].strip()))
6221        else:
6222            raise ValueError("opening and closing arguments must be strings if no content expression is given")
6223    ret = Forward()
6224    if ignoreExpr is not None:
6225        ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer))
6226    else:
6227        ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content)  + Suppress(closer))
6228    ret.setName('nested %s%s expression' % (opener, closer))
6229    return ret
6230
6231def indentedBlock(blockStatementExpr, indentStack, indent=True):
6232    """Helper method for defining space-delimited indentation blocks,
6233    such as those used to define block statements in Python source code.
6234
6235    Parameters:
6236
6237     - blockStatementExpr - expression defining syntax of statement that
6238       is repeated within the indented block
6239     - indentStack - list created by caller to manage indentation stack
6240       (multiple statementWithIndentedBlock expressions within a single
6241       grammar should share a common indentStack)
6242     - indent - boolean indicating whether block must be indented beyond
6243       the current level; set to False for block of left-most
6244       statements (default= ``True``)
6245
6246    A valid block must contain at least one ``blockStatement``.
6247
6248    Example::
6249
6250        data = '''
6251        def A(z):
6252          A1
6253          B = 100
6254          G = A2
6255          A2
6256          A3
6257        B
6258        def BB(a,b,c):
6259          BB1
6260          def BBA():
6261            bba1
6262            bba2
6263            bba3
6264        C
6265        D
6266        def spam(x,y):
6267             def eggs(z):
6268                 pass
6269        '''
6270
6271
6272        indentStack = [1]
6273        stmt = Forward()
6274
6275        identifier = Word(alphas, alphanums)
6276        funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":")
6277        func_body = indentedBlock(stmt, indentStack)
6278        funcDef = Group(funcDecl + func_body)
6279
6280        rvalue = Forward()
6281        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
6282        rvalue << (funcCall | identifier | Word(nums))
6283        assignment = Group(identifier + "=" + rvalue)
6284        stmt << (funcDef | assignment | identifier)
6285
6286        module_body = OneOrMore(stmt)
6287
6288        parseTree = module_body.parseString(data)
6289        parseTree.pprint()
6290
6291    prints::
6292
6293        [['def',
6294          'A',
6295          ['(', 'z', ')'],
6296          ':',
6297          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
6298         'B',
6299         ['def',
6300          'BB',
6301          ['(', 'a', 'b', 'c', ')'],
6302          ':',
6303          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
6304         'C',
6305         'D',
6306         ['def',
6307          'spam',
6308          ['(', 'x', 'y', ')'],
6309          ':',
6310          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
6311    """
6312    backup_stack = indentStack[:]
6313
6314    def reset_stack():
6315        indentStack[:] = backup_stack
6316
6317    def checkPeerIndent(s, l, t):
6318        if l >= len(s): return
6319        curCol = col(l, s)
6320        if curCol != indentStack[-1]:
6321            if curCol > indentStack[-1]:
6322                raise ParseException(s, l, "illegal nesting")
6323            raise ParseException(s, l, "not a peer entry")
6324
6325    def checkSubIndent(s, l, t):
6326        curCol = col(l, s)
6327        if curCol > indentStack[-1]:
6328            indentStack.append(curCol)
6329        else:
6330            raise ParseException(s, l, "not a subentry")
6331
6332    def checkUnindent(s, l, t):
6333        if l >= len(s): return
6334        curCol = col(l, s)
6335        if not(indentStack and curCol in indentStack):
6336            raise ParseException(s, l, "not an unindent")
6337        if curCol < indentStack[-1]:
6338            indentStack.pop()
6339
6340    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd())
6341    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
6342    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
6343    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
6344    if indent:
6345        smExpr = Group(Optional(NL)
6346                       + INDENT
6347                       + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6348                       + UNDENT)
6349    else:
6350        smExpr = Group(Optional(NL)
6351                       + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd())
6352                       + UNDENT)
6353    smExpr.setFailAction(lambda a, b, c, d: reset_stack())
6354    blockStatementExpr.ignore(_bslash + LineEnd())
6355    return smExpr.setName('indented block')
6356
6357alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
6358punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
6359
6360anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag'))
6361_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\''))
6362commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
6363def replaceHTMLEntity(t):
6364    """Helper parser action to replace common HTML entities with their special characters"""
6365    return _htmlEntityMap.get(t.entity)
6366
6367# it's easy to get these comment structures wrong - they're very common, so may as well make them available
6368cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
6369"Comment of the form ``/* ... */``"
6370
6371htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
6372"Comment of the form ``<!-- ... -->``"
6373
6374restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
6375dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
6376"Comment of the form ``// ... (to end of line)``"
6377
6378cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment")
6379"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`"
6380
6381javaStyleComment = cppStyleComment
6382"Same as :class:`cppStyleComment`"
6383
6384pythonStyleComment = Regex(r"#.*").setName("Python style comment")
6385"Comment of the form ``# ... (to end of line)``"
6386
6387_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',')
6388                                  + Optional(Word(" \t")
6389                                             + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem")
6390commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList")
6391"""(Deprecated) Predefined expression of 1 or more printable words or
6392quoted strings, separated by commas.
6393
6394This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`.
6395"""
6396
6397# some other useful expressions - using lower-case class name since we are really using this as a namespace
6398class pyparsing_common:
6399    """Here are some common low-level expressions that may be useful in
6400    jump-starting parser development:
6401
6402     - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
6403       :class:`scientific notation<sci_real>`)
6404     - common :class:`programming identifiers<identifier>`
6405     - network addresses (:class:`MAC<mac_address>`,
6406       :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
6407     - ISO8601 :class:`dates<iso8601_date>` and
6408       :class:`datetime<iso8601_datetime>`
6409     - :class:`UUID<uuid>`
6410     - :class:`comma-separated list<comma_separated_list>`
6411
6412    Parse actions:
6413
6414     - :class:`convertToInteger`
6415     - :class:`convertToFloat`
6416     - :class:`convertToDate`
6417     - :class:`convertToDatetime`
6418     - :class:`stripHTMLTags`
6419     - :class:`upcaseTokens`
6420     - :class:`downcaseTokens`
6421
6422    Example::
6423
6424        pyparsing_common.number.runTests('''
6425            # any int or real number, returned as the appropriate type
6426            100
6427            -100
6428            +100
6429            3.14159
6430            6.02e23
6431            1e-12
6432            ''')
6433
6434        pyparsing_common.fnumber.runTests('''
6435            # any int or real number, returned as float
6436            100
6437            -100
6438            +100
6439            3.14159
6440            6.02e23
6441            1e-12
6442            ''')
6443
6444        pyparsing_common.hex_integer.runTests('''
6445            # hex numbers
6446            100
6447            FF
6448            ''')
6449
6450        pyparsing_common.fraction.runTests('''
6451            # fractions
6452            1/2
6453            -3/4
6454            ''')
6455
6456        pyparsing_common.mixed_integer.runTests('''
6457            # mixed fractions
6458            1
6459            1/2
6460            -3/4
6461            1-3/4
6462            ''')
6463
6464        import uuid
6465        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
6466        pyparsing_common.uuid.runTests('''
6467            # uuid
6468            12345678-1234-5678-1234-567812345678
6469            ''')
6470
6471    prints::
6472
6473        # any int or real number, returned as the appropriate type
6474        100
6475        [100]
6476
6477        -100
6478        [-100]
6479
6480        +100
6481        [100]
6482
6483        3.14159
6484        [3.14159]
6485
6486        6.02e23
6487        [6.02e+23]
6488
6489        1e-12
6490        [1e-12]
6491
6492        # any int or real number, returned as float
6493        100
6494        [100.0]
6495
6496        -100
6497        [-100.0]
6498
6499        +100
6500        [100.0]
6501
6502        3.14159
6503        [3.14159]
6504
6505        6.02e23
6506        [6.02e+23]
6507
6508        1e-12
6509        [1e-12]
6510
6511        # hex numbers
6512        100
6513        [256]
6514
6515        FF
6516        [255]
6517
6518        # fractions
6519        1/2
6520        [0.5]
6521
6522        -3/4
6523        [-0.75]
6524
6525        # mixed fractions
6526        1
6527        [1]
6528
6529        1/2
6530        [0.5]
6531
6532        -3/4
6533        [-0.75]
6534
6535        1-3/4
6536        [1.75]
6537
6538        # uuid
6539        12345678-1234-5678-1234-567812345678
6540        [UUID('12345678-1234-5678-1234-567812345678')]
6541    """
6542
6543    convertToInteger = tokenMap(int)
6544    """
6545    Parse action for converting parsed integers to Python int
6546    """
6547
6548    convertToFloat = tokenMap(float)
6549    """
6550    Parse action for converting parsed numbers to Python float
6551    """
6552
6553    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
6554    """expression that parses an unsigned integer, returns an int"""
6555
6556    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16))
6557    """expression that parses a hexadecimal integer, returns an int"""
6558
6559    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
6560    """expression that parses an integer with optional leading sign, returns an int"""
6561
6562    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
6563    """fractional expression of an integer divided by an integer, returns a float"""
6564    fraction.addParseAction(lambda t: t[0]/t[-1])
6565
6566    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
6567    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
6568    mixed_integer.addParseAction(sum)
6569
6570    real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat)
6571    """expression that parses a floating point number and returns a float"""
6572
6573    sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
6574    """expression that parses a floating point number with optional
6575    scientific notation and returns a float"""
6576
6577    # streamlining this expression makes the docs nicer-looking
6578    number = (sci_real | real | signed_integer).streamline()
6579    """any numeric expression, returns the corresponding Python type"""
6580
6581    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
6582    """any int or real number, returned as float"""
6583
6584    identifier = Word(alphas + '_', alphanums + '_').setName("identifier")
6585    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
6586
6587    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
6588    "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
6589
6590    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
6591    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address")
6592    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6593                           + "::"
6594                           + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6))
6595                           ).setName("short IPv6 address")
6596    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
6597    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
6598    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
6599    "IPv6 address (long, short, or mixed form)"
6600
6601    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
6602    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
6603
6604    @staticmethod
6605    def convertToDate(fmt="%Y-%m-%d"):
6606        """
6607        Helper to create a parse action for converting parsed date string to Python datetime.date
6608
6609        Params -
6610         - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
6611
6612        Example::
6613
6614            date_expr = pyparsing_common.iso8601_date.copy()
6615            date_expr.setParseAction(pyparsing_common.convertToDate())
6616            print(date_expr.parseString("1999-12-31"))
6617
6618        prints::
6619
6620            [datetime.date(1999, 12, 31)]
6621        """
6622        def cvt_fn(s, l, t):
6623            try:
6624                return datetime.strptime(t[0], fmt).date()
6625            except ValueError as ve:
6626                raise ParseException(s, l, str(ve))
6627        return cvt_fn
6628
6629    @staticmethod
6630    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
6631        """Helper to create a parse action for converting parsed
6632        datetime string to Python datetime.datetime
6633
6634        Params -
6635         - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
6636
6637        Example::
6638
6639            dt_expr = pyparsing_common.iso8601_datetime.copy()
6640            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
6641            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
6642
6643        prints::
6644
6645            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
6646        """
6647        def cvt_fn(s, l, t):
6648            try:
6649                return datetime.strptime(t[0], fmt)
6650            except ValueError as ve:
6651                raise ParseException(s, l, str(ve))
6652        return cvt_fn
6653
6654    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
6655    "ISO8601 date (``yyyy-mm-dd``)"
6656
6657    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
6658    "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
6659
6660    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
6661    "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
6662
6663    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
6664    @staticmethod
6665    def stripHTMLTags(s, l, tokens):
6666        """Parse action to remove HTML tags from web page HTML source
6667
6668        Example::
6669
6670            # strip HTML links from normal text
6671            text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
6672            td, td_end = makeHTMLTags("TD")
6673            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
6674            print(table_text.parseString(text).body)
6675
6676        Prints::
6677
6678            More info at the pyparsing wiki page
6679        """
6680        return pyparsing_common._html_stripper.transformString(tokens[0])
6681
6682    _commasepitem = Combine(OneOrMore(~Literal(",")
6683                                      + ~LineEnd()
6684                                      + Word(printables, excludeChars=',')
6685                                      + Optional(White(" \t")))).streamline().setName("commaItem")
6686    comma_separated_list = delimitedList(Optional(quotedString.copy()
6687                                                  | _commasepitem, default='')
6688                                         ).setName("comma separated list")
6689    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
6690
6691    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
6692    """Parse action to convert tokens to upper case."""
6693
6694    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
6695    """Parse action to convert tokens to lower case."""
6696
6697
6698class _lazyclassproperty(object):
6699    def __init__(self, fn):
6700        self.fn = fn
6701        self.__doc__ = fn.__doc__
6702        self.__name__ = fn.__name__
6703
6704    def __get__(self, obj, cls):
6705        if cls is None:
6706            cls = type(obj)
6707        if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', [])
6708                                              for superclass in cls.__mro__[1:]):
6709            cls._intern = {}
6710        attrname = self.fn.__name__
6711        if attrname not in cls._intern:
6712            cls._intern[attrname] = self.fn(cls)
6713        return cls._intern[attrname]
6714
6715
6716class unicode_set(object):
6717    """
6718    A set of Unicode characters, for language-specific strings for
6719    ``alphas``, ``nums``, ``alphanums``, and ``printables``.
6720    A unicode_set is defined by a list of ranges in the Unicode character
6721    set, in a class attribute ``_ranges``, such as::
6722
6723        _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6724
6725    A unicode set can also be defined using multiple inheritance of other unicode sets::
6726
6727        class CJK(Chinese, Japanese, Korean):
6728            pass
6729    """
6730    _ranges = []
6731
6732    @classmethod
6733    def _get_chars_for_ranges(cls):
6734        ret = []
6735        for cc in cls.__mro__:
6736            if cc is unicode_set:
6737                break
6738            for rr in cc._ranges:
6739                ret.extend(range(rr[0], rr[-1] + 1))
6740        return [unichr(c) for c in sorted(set(ret))]
6741
6742    @_lazyclassproperty
6743    def printables(cls):
6744        "all non-whitespace characters in this range"
6745        return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges()))
6746
6747    @_lazyclassproperty
6748    def alphas(cls):
6749        "all alphabetic characters in this range"
6750        return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges()))
6751
6752    @_lazyclassproperty
6753    def nums(cls):
6754        "all numeric digit characters in this range"
6755        return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges()))
6756
6757    @_lazyclassproperty
6758    def alphanums(cls):
6759        "all alphanumeric characters in this range"
6760        return cls.alphas + cls.nums
6761
6762
6763class pyparsing_unicode(unicode_set):
6764    """
6765    A namespace class for defining common language unicode_sets.
6766    """
6767    _ranges = [(32, sys.maxunicode)]
6768
6769    class Latin1(unicode_set):
6770        "Unicode set for Latin-1 Unicode Character Range"
6771        _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
6772
6773    class LatinA(unicode_set):
6774        "Unicode set for Latin-A Unicode Character Range"
6775        _ranges = [(0x0100, 0x017f),]
6776
6777    class LatinB(unicode_set):
6778        "Unicode set for Latin-B Unicode Character Range"
6779        _ranges = [(0x0180, 0x024f),]
6780
6781    class Greek(unicode_set):
6782        "Unicode set for Greek Unicode Character Ranges"
6783        _ranges = [
6784            (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d),
6785            (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4),
6786            (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe),
6787        ]
6788
6789    class Cyrillic(unicode_set):
6790        "Unicode set for Cyrillic Unicode Character Range"
6791        _ranges = [(0x0400, 0x04ff)]
6792
6793    class Chinese(unicode_set):
6794        "Unicode set for Chinese Unicode Character Range"
6795        _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),]
6796
6797    class Japanese(unicode_set):
6798        "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
6799        _ranges = []
6800
6801        class Kanji(unicode_set):
6802            "Unicode set for Kanji Unicode Character Range"
6803            _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),]
6804
6805        class Hiragana(unicode_set):
6806            "Unicode set for Hiragana Unicode Character Range"
6807            _ranges = [(0x3040, 0x309f),]
6808
6809        class Katakana(unicode_set):
6810            "Unicode set for Katakana  Unicode Character Range"
6811            _ranges = [(0x30a0, 0x30ff),]
6812
6813    class Korean(unicode_set):
6814        "Unicode set for Korean Unicode Character Range"
6815        _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),]
6816
6817    class CJK(Chinese, Japanese, Korean):
6818        "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"
6819        pass
6820
6821    class Thai(unicode_set):
6822        "Unicode set for Thai Unicode Character Range"
6823        _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),]
6824
6825    class Arabic(unicode_set):
6826        "Unicode set for Arabic Unicode Character Range"
6827        _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),]
6828
6829    class Hebrew(unicode_set):
6830        "Unicode set for Hebrew Unicode Character Range"
6831        _ranges = [(0x0590, 0x05ff),]
6832
6833    class Devanagari(unicode_set):
6834        "Unicode set for Devanagari Unicode Character Range"
6835        _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)]
6836
6837pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges
6838                                      + pyparsing_unicode.Japanese.Hiragana._ranges
6839                                      + pyparsing_unicode.Japanese.Katakana._ranges)
6840
6841# define ranges in language character sets
6842if PY_3:
6843    setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic)
6844    setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese)
6845    setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic)
6846    setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek)
6847    setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew)
6848    setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese)
6849    setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji)
6850    setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana)
6851    setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana)
6852    setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean)
6853    setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai)
6854    setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari)
6855
6856
6857class pyparsing_test:
6858    """
6859    namespace class for classes useful in writing unit tests
6860    """
6861
6862    class reset_pyparsing_context:
6863        """
6864        Context manager to be used when writing unit tests that modify pyparsing config values:
6865         - packrat parsing
6866         - default whitespace characters.
6867         - default keyword characters
6868         - literal string auto-conversion class
6869         - __diag__ settings
6870
6871        Example:
6872            with reset_pyparsing_context():
6873                # test that literals used to construct a grammar are automatically suppressed
6874                ParserElement.inlineLiteralsUsing(Suppress)
6875
6876                term = Word(alphas) | Word(nums)
6877                group = Group('(' + term[...] + ')')
6878
6879                # assert that the '()' characters are not included in the parsed tokens
6880                self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def'])
6881
6882            # after exiting context manager, literals are converted to Literal expressions again
6883        """
6884
6885        def __init__(self):
6886            self._save_context = {}
6887
6888        def save(self):
6889            self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
6890            self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
6891            self._save_context[
6892                "literal_string_class"
6893            ] = ParserElement._literalStringClass
6894            self._save_context["packrat_enabled"] = ParserElement._packratEnabled
6895            self._save_context["packrat_parse"] = ParserElement._parse
6896            self._save_context["__diag__"] = {
6897                name: getattr(__diag__, name) for name in __diag__._all_names
6898            }
6899            self._save_context["__compat__"] = {
6900                "collect_all_And_tokens": __compat__.collect_all_And_tokens
6901            }
6902            return self
6903
6904        def restore(self):
6905            # reset pyparsing global state
6906            if (
6907                ParserElement.DEFAULT_WHITE_CHARS
6908                != self._save_context["default_whitespace"]
6909            ):
6910                ParserElement.setDefaultWhitespaceChars(
6911                    self._save_context["default_whitespace"]
6912                )
6913            Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
6914            ParserElement.inlineLiteralsUsing(
6915                self._save_context["literal_string_class"]
6916            )
6917            for name, value in self._save_context["__diag__"].items():
6918                setattr(__diag__, name, value)
6919            ParserElement._packratEnabled = self._save_context["packrat_enabled"]
6920            ParserElement._parse = self._save_context["packrat_parse"]
6921            __compat__.collect_all_And_tokens = self._save_context["__compat__"]
6922
6923        def __enter__(self):
6924            return self.save()
6925
6926        def __exit__(self, *args):
6927            return self.restore()
6928
6929    class TestParseResultsAsserts:
6930        """
6931        A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
6932        """
6933        def assertParseResultsEquals(
6934            self, result, expected_list=None, expected_dict=None, msg=None
6935        ):
6936            """
6937            Unit test assertion to compare a ParseResults object with an optional expected_list,
6938            and compare any defined results names with an optional expected_dict.
6939            """
6940            if expected_list is not None:
6941                self.assertEqual(expected_list, result.asList(), msg=msg)
6942            if expected_dict is not None:
6943                self.assertEqual(expected_dict, result.asDict(), msg=msg)
6944
6945        def assertParseAndCheckList(
6946            self, expr, test_string, expected_list, msg=None, verbose=True
6947        ):
6948            """
6949            Convenience wrapper assert to test a parser element and input string, and assert that
6950            the resulting ParseResults.asList() is equal to the expected_list.
6951            """
6952            result = expr.parseString(test_string, parseAll=True)
6953            if verbose:
6954                print(result.dump())
6955            self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
6956
6957        def assertParseAndCheckDict(
6958            self, expr, test_string, expected_dict, msg=None, verbose=True
6959        ):
6960            """
6961            Convenience wrapper assert to test a parser element and input string, and assert that
6962            the resulting ParseResults.asDict() is equal to the expected_dict.
6963            """
6964            result = expr.parseString(test_string, parseAll=True)
6965            if verbose:
6966                print(result.dump())
6967            self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
6968
6969        def assertRunTestResults(
6970            self, run_tests_report, expected_parse_results=None, msg=None
6971        ):
6972            """
6973            Unit test assertion to evaluate output of ParserElement.runTests(). If a list of
6974            list-dict tuples is given as the expected_parse_results argument, then these are zipped
6975            with the report tuples returned by runTests and evaluated using assertParseResultsEquals.
6976            Finally, asserts that the overall runTests() success value is True.
6977
6978            :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
6979            :param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
6980            """
6981            run_test_success, run_test_results = run_tests_report
6982
6983            if expected_parse_results is not None:
6984                merged = [
6985                    (rpt[0], rpt[1], expected)
6986                    for rpt, expected in zip(run_test_results, expected_parse_results)
6987                ]
6988                for test_string, result, expected in merged:
6989                    # expected should be a tuple containing a list and/or a dict or an exception,
6990                    # and optional failure message string
6991                    # an empty tuple will skip any result validation
6992                    fail_msg = next(
6993                        (exp for exp in expected if isinstance(exp, str)), None
6994                    )
6995                    expected_exception = next(
6996                        (
6997                            exp
6998                            for exp in expected
6999                            if isinstance(exp, type) and issubclass(exp, Exception)
7000                        ),
7001                        None,
7002                    )
7003                    if expected_exception is not None:
7004                        with self.assertRaises(
7005                            expected_exception=expected_exception, msg=fail_msg or msg
7006                        ):
7007                            if isinstance(result, Exception):
7008                                raise result
7009                    else:
7010                        expected_list = next(
7011                            (exp for exp in expected if isinstance(exp, list)), None
7012                        )
7013                        expected_dict = next(
7014                            (exp for exp in expected if isinstance(exp, dict)), None
7015                        )
7016                        if (expected_list, expected_dict) != (None, None):
7017                            self.assertParseResultsEquals(
7018                                result,
7019                                expected_list=expected_list,
7020                                expected_dict=expected_dict,
7021                                msg=fail_msg or msg,
7022                            )
7023                        else:
7024                            # warning here maybe?
7025                            print("no validation for {!r}".format(test_string))
7026
7027            # do this last, in case some specific test results can be reported instead
7028            self.assertTrue(
7029                run_test_success, msg=msg if msg is not None else "failed runTests"
7030            )
7031
7032        @contextmanager
7033        def assertRaisesParseException(self, exc_type=ParseException, msg=None):
7034            with self.assertRaises(exc_type, msg=msg):
7035                yield
7036
7037
7038if __name__ == "__main__":
7039
7040    selectToken    = CaselessLiteral("select")
7041    fromToken      = CaselessLiteral("from")
7042
7043    ident          = Word(alphas, alphanums + "_$")
7044
7045    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7046    columnNameList = Group(delimitedList(columnName)).setName("columns")
7047    columnSpec     = ('*' | columnNameList)
7048
7049    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
7050    tableNameList  = Group(delimitedList(tableName)).setName("tables")
7051
7052    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
7053
7054    # demo runTests method, including embedded comments in test string
7055    simpleSQL.runTests("""
7056        # '*' as column list and dotted table name
7057        select * from SYS.XYZZY
7058
7059        # caseless match on "SELECT", and casts back to "select"
7060        SELECT * from XYZZY, ABC
7061
7062        # list of column names, and mixed case SELECT keyword
7063        Select AA,BB,CC from Sys.dual
7064
7065        # multiple tables
7066        Select A, B, C from Sys.dual, Table2
7067
7068        # invalid SELECT keyword - should fail
7069        Xelect A, B, C from Sys.dual
7070
7071        # incomplete command - should fail
7072        Select
7073
7074        # invalid column name - should fail
7075        Select ^^^ frox Sys.dual
7076
7077        """)
7078
7079    pyparsing_common.number.runTests("""
7080        100
7081        -100
7082        +100
7083        3.14159
7084        6.02e23
7085        1e-12
7086        """)
7087
7088    # any int or real number, returned as float
7089    pyparsing_common.fnumber.runTests("""
7090        100
7091        -100
7092        +100
7093        3.14159
7094        6.02e23
7095        1e-12
7096        """)
7097
7098    pyparsing_common.hex_integer.runTests("""
7099        100
7100        FF
7101        """)
7102
7103    import uuid
7104    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
7105    pyparsing_common.uuid.runTests("""
7106        12345678-1234-5678-1234-567812345678
7107        """)
7108