1# module pyparsing.py
2#
3# Copyright (c) 2003-2008  Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24
25__doc__ = """
26pyparsing module - Classes and methods to define and execute parsing grammars
27
28The pyparsing module is an alternative approach to creating and executing simple grammars,
29vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
30don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
31provides a library of classes that you use to construct the grammar directly in Python.
32
33Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
34
35    from pyparsing import Word, alphas
36
37    # define grammar of a greeting
38    greet = Word( alphas ) + "," + Word( alphas ) + "!"
39
40    hello = "Hello, World!"
41    print hello, "->", greet.parseString( hello )
42
43The program outputs the following::
44
45    Hello, World! -> ['Hello', ',', 'World', '!']
46
47The Python representation of the grammar is quite readable, owing to the self-explanatory
48class names, and the use of '+', '|' and '^' operators.
49
50The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
51object with named attributes.
52
53The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
54 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
55 - quoted strings
56 - embedded comments
57"""
58
59__version__ = "1.5.0"
60__versionTime__ = "28 May 2008 10:05"
61__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
62
63import copy
64import re
65import string
66import sys
67import warnings
68import xml.sax.saxutils
69from weakref import ref as wkref
70
71import sre_constants
72
73__all__ = [
74    'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
75    'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
76    'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
77    'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
78    'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
79    'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
80    'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
81    'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
82    'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
83    'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
84    'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
85    'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
86    'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
87    'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
88    'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
89    'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
90    'indentedBlock',
91]
92
93
94"""
95Detect if we are running version 3.X and make appropriate changes
96Robert A. Clark
97"""
98_MAX_INT = sys.maxsize
99
100
101def _str2dict(strg):
102    return {c: 0 for c in strg}
103    # ~ return set( [c for c in strg] )
104
105
106class _Constants:
107    pass
108
109
110alphas = string.ascii_lowercase + string.ascii_uppercase
111nums = string.digits
112hexnums = nums + "ABCDEFabcdef"
113alphanums = alphas + nums
114_bslash = "\\"
115printables = "".join([c for c in string.printable if c not in string.whitespace])
116
117
118class ParseBaseException(Exception):
119    """base exception class for all parsing runtime exceptions"""
120    __slots__ = ("loc", "msg", "pstr", "parserElement")
121    # Performance tuning: we construct a *lot* of these, so keep this
122    # constructor as small and fast as possible
123
124    def __init__(self, pstr, loc=0, msg=None, elem=None):
125        self.loc = loc
126        if msg is None:
127            self.msg = pstr
128            self.pstr = ""
129        else:
130            self.msg = msg
131            self.pstr = pstr
132        self.parserElement = elem
133
134    def __getattr__(self, aname):
135        """supported attributes by name are:
136            - lineno - returns the line number of the exception text
137            - col - returns the column number of the exception text
138            - line - returns the line containing the exception text
139        """
140        if(aname == "lineno"):
141            return lineno(self.loc, self.pstr)
142        elif(aname in ("col", "column")):
143            return col(self.loc, self.pstr)
144        elif(aname == "line"):
145            return line(self.loc, self.pstr)
146        else:
147            raise AttributeError(aname)
148
149    def __str__(self):
150        return "%s (at char %d), (line:%d, col:%d)" % (self.msg, self.loc, self.lineno, self.column)
151
152    def __repr__(self):
153        return str(self)
154
155    def markInputline(self, markerString=">!<"):
156        """Extracts the exception line from the input string, and marks
157           the location of the exception with a special symbol.
158        """
159        line_str = self.line
160        line_column = self.column - 1
161        if markerString:
162            line_str = "".join([line_str[:line_column],
163                                markerString, line_str[line_column:]])
164        return line_str.strip()
165
166
167class ParseException(ParseBaseException):
168    """exception thrown when parse expressions don't match class;
169       supported attributes by name are:
170        - lineno - returns the line number of the exception text
171        - col - returns the column number of the exception text
172        - line - returns the line containing the exception text
173    """
174
175
176class ParseFatalException(ParseBaseException):
177    """user-throwable exception thrown when inconsistent parse content
178       is found; stops all parsing immediately"""
179
180
181class ParseSyntaxException(ParseFatalException):
182    """just like ParseFatalException, but thrown internally when an
183       ErrorStop indicates that parsing is to stop immediately because
184       an unbacktrackable syntax error has been found"""
185
186    def __init__(self, pe):
187        super().__init__(pe.pstr, pe.loc, pe.msg, pe.parserElement)
188
189
190class RecursiveGrammarException(Exception):
191    """exception thrown by validate() if the grammar could be improperly recursive"""
192
193    def __init__(self, parseElementList):
194        self.parseElementTrace = parseElementList
195
196    def __str__(self):
197        return "RecursiveGrammarException: %s" % self.parseElementTrace
198
199
200class _ParseResultsWithOffset:
201    def __init__(self, p1, p2):
202        self.tup = (p1, p2)
203
204    def __getitem__(self, i):
205        return self.tup[i]
206
207    def __repr__(self):
208        return repr(self.tup)
209
210
211class ParseResults:
212    """Structured parse results, to provide multiple means of access to the parsed data:
213       - as a list (len(results))
214       - by list index (results[0], results[1], etc.)
215       - by attribute (results.<resultsName>)
216       """
217    __slots__ = ("__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__")
218
219    def __new__(cls, toklist, name=None, asList=True, modal=True):
220        if isinstance(toklist, cls):
221            return toklist
222        retobj = object.__new__(cls)
223        retobj.__doinit = True
224        return retobj
225
226    # Performance tuning: we construct a *lot* of these, so keep this
227    # constructor as small and fast as possible
228    def __init__(self, toklist, name=None, asList=True, modal=True):
229        if self.__doinit:
230            self.__doinit = False
231            self.__name = None
232            self.__parent = None
233            self.__accumNames = {}
234            if isinstance(toklist, list):
235                self.__toklist = toklist[:]
236            else:
237                self.__toklist = [toklist]
238            self.__tokdict = dict()
239
240        # this line is related to debugging the asXML bug
241        # ~ asList = False
242
243        if name:
244            if not modal:
245                self.__accumNames[name] = 0
246            if isinstance(name, int):
247                name = str(name)
248            self.__name = name
249            if toklist not in (None, '', []):
250                if isinstance(toklist, str):
251                    toklist = [toklist]
252                if asList:
253                    if isinstance(toklist, ParseResults):
254                        self[name] = _ParseResultsWithOffset(toklist.copy(), -1)
255                    else:
256                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), -1)
257                    self[name].__name = name
258                else:
259                    try:
260                        self[name] = toklist[0]
261                    except (KeyError, TypeError):
262                        self[name] = toklist
263
264    def __getitem__(self, i):
265        if isinstance(i, (int, slice)):
266            return self.__toklist[i]
267        else:
268            if i not in self.__accumNames:
269                return self.__tokdict[i][-1][0]
270            else:
271                return ParseResults([v[0] for v in self.__tokdict[i]])
272
273    def __setitem__(self, k, v):
274        if isinstance(v, _ParseResultsWithOffset):
275            self.__tokdict[k] = self.__tokdict.get(k, list()) + [v]
276            sub = v[0]
277        elif isinstance(k, int):
278            self.__toklist[k] = v
279            sub = v
280        else:
281            self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)]
282            sub = v
283        if isinstance(sub, ParseResults):
284            sub.__parent = wkref(self)
285
286    def __delitem__(self, i):
287        if isinstance(i, (int, slice)):
288            mylen = len(self.__toklist)
289            del self.__toklist[i]
290
291            # convert int to slice
292            if isinstance(i, int):
293                if i < 0:
294                    i += mylen
295                i = slice(i, i+1)
296            # get removed indices
297            removed = list(range(*i.indices(mylen)))
298            removed.reverse()
299            # fixup indices in token dictionary
300            for name in self.__tokdict:
301                occurrences = self.__tokdict[name]
302                for j in removed:
303                    for k, (value, position) in enumerate(occurrences):
304                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
305        else:
306            del self.__tokdict[i]
307
308    def __contains__(self, k):
309        return k in self.__tokdict
310
311    def __len__(self):
312        return len(self.__toklist)
313
314    def __bool__(self):
315        return len(self.__toklist) > 0
316
317    __nonzero__ = __bool__
318
319    def __iter__(self):
320        return iter(self.__toklist)
321
322    def __reversed__(self):
323        return iter(reversed(self.__toklist))
324
325    def keys(self):
326        """Returns all named result keys."""
327        return self.__tokdict.keys()
328
329    def pop(self, index=-1):
330        """Removes and returns item at specified index (default=last).
331           Will work with either numeric indices or dict-key indicies."""
332        ret = self[index]
333        del self[index]
334        return ret
335
336    def get(self, key, defaultValue=None):
337        """Returns named result matching the given key, or if there is no
338           such name, then returns the given defaultValue or None if no
339           defaultValue is specified."""
340        if key in self:
341            return self[key]
342        else:
343            return defaultValue
344
345    def items(self):
346        """Returns all named result keys and values as a list of tuples."""
347        return [(k, self[k]) for k in self.__tokdict]
348
349    def values(self):
350        """Returns all named result values."""
351        return [v[-1][0] for v in self.__tokdict.values()]
352
353    def __getattr__(self, name):
354        if name not in self.__slots__:
355            if name in self.__tokdict:
356                if name not in self.__accumNames:
357                    return self.__tokdict[name][-1][0]
358                else:
359                    return ParseResults([v[0] for v in self.__tokdict[name]])
360            else:
361                return ""
362        return None
363
364    def __add__(self, other):
365        ret = self.copy()
366        ret += other
367        return ret
368
369    def __iadd__(self, other):
370        if other.__tokdict:
371            offset = len(self.__toklist)
372            addoffset = (lambda a: (a < 0 and offset) or (a+offset))
373            otheritems = other.__tokdict.items()
374            otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
375                              for (k, vlist) in otheritems for v in vlist]
376            for k, v in otherdictitems:
377                self[k] = v
378                if isinstance(v[0], ParseResults):
379                    v[0].__parent = wkref(self)
380        self.__toklist += other.__toklist
381        self.__accumNames.update(other.__accumNames)
382        del other
383        return self
384
385    def __repr__(self):
386        return f"({repr(self.__toklist)}, {repr(self.__tokdict)})"
387
388    def __str__(self):
389        out = "["
390        sep = ""
391        for i in self.__toklist:
392            if isinstance(i, ParseResults):
393                out += sep + str(i)
394            else:
395                out += sep + repr(i)
396            sep = ", "
397        out += "]"
398        return out
399
400    def _asStringList(self, sep=''):
401        out = []
402        for item in self.__toklist:
403            if out and sep:
404                out.append(sep)
405            if isinstance(item, ParseResults):
406                out += item._asStringList()
407            else:
408                out.append(str(item))
409        return out
410
411    def asList(self):
412        """Returns the parse results as a nested list of matching tokens, all converted to strings."""
413        out = []
414        for res in self.__toklist:
415            if isinstance(res, ParseResults):
416                out.append(res.asList())
417            else:
418                out.append(res)
419        return out
420
421    def asDict(self):
422        """Returns the named parse results as dictionary."""
423        return dict(self.items())
424
425    def copy(self):
426        """Returns a new copy of a ParseResults object."""
427        ret = ParseResults(self.__toklist)
428        ret.__tokdict = self.__tokdict.copy()
429        ret.__parent = self.__parent
430        ret.__accumNames.update(self.__accumNames)
431        ret.__name = self.__name
432        return ret
433
434    def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True):
435        """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
436        nl = "\n"
437        out = []
438        namedItems = {v[1]: k for (k, vlist) in self.__tokdict.items() for v in vlist}
439        nextLevelIndent = indent + "  "
440
441        # collapse out indents if formatting is not desired
442        if not formatted:
443            indent = ""
444            nextLevelIndent = ""
445            nl = ""
446
447        selfTag = None
448        if doctag is not None:
449            selfTag = doctag
450        else:
451            if self.__name:
452                selfTag = self.__name
453
454        if not selfTag:
455            if namedItemsOnly:
456                return ""
457            else:
458                selfTag = "ITEM"
459
460        out += [nl, indent, "<", selfTag, ">"]
461
462        worklist = self.__toklist
463        for i, res in enumerate(worklist):
464            if isinstance(res, ParseResults):
465                if i in namedItems:
466                    out += [res.asXML(
467                        namedItems[i],
468                        namedItemsOnly and doctag is None,
469                        nextLevelIndent,
470                        formatted)]
471                else:
472                    out += [res.asXML(
473                        None,
474                        namedItemsOnly and doctag is None,
475                        nextLevelIndent,
476                        formatted)]
477            else:
478                # individual token, see if there is a name for it
479                resTag = None
480                if i in namedItems:
481                    resTag = namedItems[i]
482                if not resTag:
483                    if namedItemsOnly:
484                        continue
485                    else:
486                        resTag = "ITEM"
487                xmlBodyText = xml.sax.saxutils.escape(str(res))
488                out += [nl, nextLevelIndent, "<", resTag, ">", xmlBodyText, "</", resTag, ">"]
489
490        out += [nl, indent, "</", selfTag, ">"]
491        return "".join(out)
492
493    def __lookup(self, sub):
494        for k, vlist in self.__tokdict.items():
495            for v, _loc in vlist:
496                if sub is v:
497                    return k
498        return None
499
500    def getName(self):
501        """Returns the results name for this token expression."""
502        if self.__name:
503            return self.__name
504        elif self.__parent:
505            par = self.__parent()
506            if par:
507                return par.__lookup(self)
508            else:
509                return None
510        elif (len(self) == 1
511                and len(self.__tokdict) == 1
512                and self.__tokdict.values()[0][0][1] in (0, -1)):
513            return self.__tokdict.keys()[0]
514        else:
515            return None
516
517    def dump(self, indent='', depth=0):
518        """Diagnostic method for listing out the contents of a ParseResults.
519           Accepts an optional indent argument so that this string can be embedded
520           in a nested display of other data."""
521        out = []
522        out.append(indent+str(self.asList()))
523        keys = sorted(self.items())
524        for k, v in keys:
525            if out:
526                out.append('\n')
527            out.append("{}{}- {}: ".format(indent, ('  '*depth), k))
528            if isinstance(v, ParseResults):
529                if v.keys():
530                    out.append(v.dump(indent, depth+1))
531                else:
532                    out.append(str(v))
533            else:
534                out.append(str(v))
535        return "".join(out)
536
537    # add support for pickle protocol
538    def __getstate__(self):
539        return (self.__toklist, (
540            self.__tokdict.copy(),
541            self.__parent is not None and self.__parent() or None,
542            self.__accumNames,
543            self.__name))
544
545    def __setstate__(self, state):
546        self.__toklist = state[0]
547        self.__tokdict, par, inAccumNames, self.__name = state[1]
548        self.__accumNames = {}
549        self.__accumNames.update(inAccumNames)
550        if par is not None:
551            self.__parent = wkref(par)
552        else:
553            self.__parent = None
554
555
556def col(loc, strg):
557    """Returns current column within a string, counting newlines as line separators.
558   The first column is number 1.
559
560   Note: the default parsing behavior is to expand tabs in the input string
561   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
562   on parsing strings containing <TAB>s, and suggested methods to maintain a
563   consistent view of the parsed string, the parse location, and line and column
564   positions within the parsed string.
565   """
566    return (loc < len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
567
568
569def lineno(loc, strg):
570    """Returns current line number within a string, counting newlines as line separators.
571   The first line is number 1.
572
573   Note: the default parsing behavior is to expand tabs in the input string
574   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
575   on parsing strings containing <TAB>s, and suggested methods to maintain a
576   consistent view of the parsed string, the parse location, and line and column
577   positions within the parsed string.
578   """
579    return strg.count("\n", 0, loc) + 1
580
581
582def line(loc, strg):
583    """Returns the line of text containing loc within a string, counting newlines as line separators.
584       """
585    lastCR = strg.rfind("\n", 0, loc)
586    nextCR = strg.find("\n", loc)
587    if nextCR > 0:
588        return strg[lastCR+1:nextCR]
589    else:
590        return strg[lastCR+1:]
591
592
593def _defaultStartDebugAction(instring, loc, expr):
594    print("Match " + str(expr) + " at loc " + str(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring)))
595
596
597def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks):
598    print("Matched " + str(expr) + " -> " + str(toks.asList()))
599
600
601def _defaultExceptionDebugAction(instring, loc, expr, exc):
602    print("Exception raised:" + str(exc))
603
604
605def nullDebugAction(*args):
606    """'Do-nothing' debug action, to suppress debugging output during parsing."""
607
608
609class ParserElement:
610    """Abstract base level parser element class."""
611    DEFAULT_WHITE_CHARS = " \n\t\r"
612
613    def setDefaultWhitespaceChars(chars):
614        """Overrides the default whitespace chars
615        """
616        ParserElement.DEFAULT_WHITE_CHARS = chars
617    setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
618
619    def __init__(self, savelist=False):
620        self.parseAction = list()
621        self.failAction = None
622        # ~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
623        self.strRepr = None
624        self.resultsName = None
625        self.saveAsList = savelist
626        self.skipWhitespace = True
627        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
628        self.copyDefaultWhiteChars = True
629        self.mayReturnEmpty = False  # used when checking for left-recursion
630        self.keepTabs = False
631        self.ignoreExprs = list()
632        self.debug = False
633        self.streamlined = False
634        self.mayIndexError = True  # used to optimize exception handling for subclasses that don't advance parse index
635        self.errmsg = ""
636        self.modalResults = True  # used to mark results names as modal (report only last) or cumulative (list all)
637        self.debugActions = (None, None, None)  # custom debug actions
638        self.re = None
639        self.callPreparse = True  # used to avoid redundant calls to preParse
640        self.callDuringTry = False
641
642    def copy(self):
643        """Make a copy of this ParserElement.  Useful for defining different parse actions
644           for the same parsing pattern, using copies of the original parse element."""
645        cpy = copy.copy(self)
646        cpy.parseAction = self.parseAction[:]
647        cpy.ignoreExprs = self.ignoreExprs[:]
648        if self.copyDefaultWhiteChars:
649            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
650        return cpy
651
652    def setName(self, name):
653        """Define name for this expression, for use in debugging."""
654        self.name = name
655        self.errmsg = "Expected " + self.name
656        if hasattr(self, "exception"):
657            self.exception.msg = self.errmsg
658        return self
659
660    def setResultsName(self, name, listAllMatches=False):
661        """Define name for referencing matching tokens as a nested attribute
662           of the returned parse results.
663           NOTE: this returns a *copy* of the original ParserElement object;
664           this is so that the client can define a basic element, such as an
665           integer, and reference it in multiple places with different names.
666        """
667        newself = self.copy()
668        newself.resultsName = name
669        newself.modalResults = not listAllMatches
670        return newself
671
672    def setBreak(self, breakFlag=True):
673        """Method to invoke the Python pdb debugger when this element is
674           about to be parsed. Set breakFlag to True to enable, False to
675           disable.
676        """
677        if breakFlag:
678            _parseMethod = self._parse
679
680            def breaker(instring, loc, doActions=True, callPreParse=True):
681                import pdb
682                pdb.set_trace()
683                _parseMethod(instring, loc, doActions, callPreParse)
684            breaker._originalParseMethod = _parseMethod
685            self._parse = breaker
686        else:
687            if hasattr(self._parse, "_originalParseMethod"):
688                self._parse = self._parse._originalParseMethod
689        return self
690
691    def _normalizeParseActionArgs(f):
692        """Internal method used to decorate parse actions that take fewer than 3 arguments,
693           so that all parse actions can be called as f(s,l,t)."""
694        STAR_ARGS = 4
695
696        try:
697            restore = None
698            if isinstance(f, type):
699                restore = f
700                f = f.__init__
701            codeObj = f.code
702            if codeObj.co_flags & STAR_ARGS:
703                return f
704            numargs = codeObj.co_argcount
705            if hasattr(f, "__self__"):
706                numargs -= 1
707            if restore:
708                f = restore
709        except AttributeError:
710            try:
711                call_im_func_code = f.__code__
712
713                # not a function, must be a callable object, get info from the
714                # im_func binding of its bound __call__ method
715                if call_im_func_code.co_flags & STAR_ARGS:
716                    return f
717                numargs = call_im_func_code.co_argcount
718                if hasattr(f.__call__, "__self__"):
719                    numargs -= 0
720            except AttributeError:
721                call_func_code = f.__call__.__code__
722                # not a bound method, get info directly from __call__ method
723                if call_func_code.co_flags & STAR_ARGS:
724                    return f
725                numargs = call_func_code.co_argcount
726                if hasattr(f.__call__, "__self__"):
727                    numargs -= 1
728
729        # ~ print ("adding function %s with %d args" % (f.func_name,numargs))
730        if numargs == 3:
731            return f
732        else:
733            if numargs > 3:
734                def tmp(s, l, t):
735                    return f(f.__call__.__self__, s, l, t)
736            elif numargs == 2:
737                def tmp(s, l, t):
738                    return f(l, t)
739            elif numargs == 1:
740                def tmp(s, l, t):
741                    return f(t)
742            else:  # ~ numargs == 0:
743                def tmp(s, l, t):
744                    return f()
745            try:
746                tmp.__name__ = f.__name__
747            except (AttributeError, TypeError):
748                # no need for special handling if attribute doesnt exist
749                pass
750            try:
751                tmp.__doc__ = f.__doc__
752            except (AttributeError, TypeError):
753                # no need for special handling if attribute doesnt exist
754                pass
755            try:
756                tmp.__dict__.update(f.__dict__)
757            except (AttributeError, TypeError):
758                # no need for special handling if attribute doesnt exist
759                pass
760            return tmp
761    _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
762
763    def setParseAction(self, *fns, **kwargs):
764        """Define action to perform when successfully matching parse element definition.
765           Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
766           fn(loc,toks), fn(toks), or just fn(), where:
767            - s   = the original string being parsed (see note below)
768            - loc = the location of the matching substring
769            - toks = a list of the matched tokens, packaged as a ParseResults object
770           If the functions in fns modify the tokens, they can return them as the return
771           value from fn, and the modified list of tokens will replace the original.
772           Otherwise, fn does not need to return any value.
773
774           Note: the default parsing behavior is to expand tabs in the input string
775           before starting the parsing process.  See L{I{parseString}<parseString>} for more information
776           on parsing strings containing <TAB>s, and suggested methods to maintain a
777           consistent view of the parsed string, the parse location, and line and column
778           positions within the parsed string.
779           """
780        self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
781        self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
782        return self
783
784    def addParseAction(self, *fns, **kwargs):
785        """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
786        self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
787        self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
788        return self
789
790    def setFailAction(self, fn):
791        """Define action to perform if parsing fails at this expression.
792           Fail acton fn is a callable function that takes the arguments
793           fn(s,loc,expr,err) where:
794            - s = string being parsed
795            - loc = location where expression match was attempted and failed
796            - expr = the parse expression that failed
797            - err = the exception thrown
798           The function returns no value.  It may throw ParseFatalException
799           if it is desired to stop parsing immediately."""
800        self.failAction = fn
801        return self
802
803    def _skipIgnorables(self, instring, loc):
804        exprsFound = True
805        while exprsFound:
806            exprsFound = False
807            for e in self.ignoreExprs:
808                try:
809                    while True:
810                        loc, dummy = e._parse(instring, loc)
811                        exprsFound = True
812                except ParseException:
813                    pass
814        return loc
815
816    def preParse(self, instring, loc):
817        if self.ignoreExprs:
818            loc = self._skipIgnorables(instring, loc)
819
820        if self.skipWhitespace:
821            wt = self.whiteChars
822            instrlen = len(instring)
823            while loc < instrlen and instring[loc] in wt:
824                loc += 1
825
826        return loc
827
828    def parseImpl(self, instring, loc, doActions=True):
829        return loc, []
830
831    def postParse(self, instring, loc, tokenlist):
832        return tokenlist
833
834    # ~ @profile
835    def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True):
836        debugging = (self.debug)  # and doActions )
837
838        if debugging or self.failAction:
839            # ~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
840            if self.debugActions[0]:
841                self.debugActions[0](instring, loc, self)
842            if callPreParse and self.callPreparse:
843                preloc = self.preParse(instring, loc)
844            else:
845                preloc = loc
846            tokensStart = loc
847            try:
848                try:
849                    loc, tokens = self.parseImpl(instring, preloc, doActions)
850                except IndexError:
851                    raise ParseException(instring, len(instring), self.errmsg, self)
852            except ParseBaseException as err:
853                # ~ print ("Exception raised:", err)
854                if self.debugActions[2]:
855                    self.debugActions[2](instring, tokensStart, self, err)
856                if self.failAction:
857                    self.failAction(instring, tokensStart, self, err)
858                raise
859        else:
860            if callPreParse and self.callPreparse:
861                preloc = self.preParse(instring, loc)
862            else:
863                preloc = loc
864            tokensStart = loc
865            if self.mayIndexError or loc >= len(instring):
866                try:
867                    loc, tokens = self.parseImpl(instring, preloc, doActions)
868                except IndexError:
869                    raise ParseException(instring, len(instring), self.errmsg, self)
870            else:
871                loc, tokens = self.parseImpl(instring, preloc, doActions)
872
873        tokens = self.postParse(instring, loc, tokens)
874
875        retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults)
876        if self.parseAction and (doActions or self.callDuringTry):
877            if debugging:
878                try:
879                    for fn in self.parseAction:
880                        tokens = fn(instring, tokensStart, retTokens)
881                        if tokens is not None:
882                            retTokens = ParseResults(
883                                tokens,
884                                self.resultsName,
885                                asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
886                                modal=self.modalResults)
887                except ParseBaseException as err:
888                    # ~ print "Exception raised in user parse action:", err
889                    if self.debugActions[2]:
890                        self.debugActions[2](instring, tokensStart, self, err)
891                    raise
892            else:
893                for fn in self.parseAction:
894                    tokens = fn(instring, tokensStart, retTokens)
895                    if tokens is not None:
896                        retTokens = ParseResults(
897                            tokens,
898                            self.resultsName,
899                            asList=self.saveAsList and isinstance(tokens, (ParseResults, list)),
900                            modal=self.modalResults)
901
902        if debugging:
903            # ~ print ("Matched",self,"->",retTokens.asList())
904            if self.debugActions[1]:
905                self.debugActions[1](instring, tokensStart, loc, self, retTokens)
906
907        return loc, retTokens
908
909    def tryParse(self, instring, loc):
910        try:
911            return self._parse(instring, loc, doActions=False)[0]
912        except ParseFatalException:
913            raise ParseException(instring, loc, self.errmsg, self)
914
915    # this method gets repeatedly called during backtracking with the same arguments -
916    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
917    def _parseCache(self, instring, loc, doActions=True, callPreParse=True):
918        lookup = (self, instring, loc, callPreParse, doActions)
919        if lookup in ParserElement._exprArgCache:
920            value = ParserElement._exprArgCache[lookup]
921            if isinstance(value, Exception):
922                raise value
923            return value
924        else:
925            try:
926                value = self._parseNoCache(instring, loc, doActions, callPreParse)
927                ParserElement._exprArgCache[lookup] = (value[0], value[1].copy())
928                return value
929            except ParseBaseException as pe:
930                ParserElement._exprArgCache[lookup] = pe
931                raise
932
933    _parse = _parseNoCache
934
935    # argument cache for optimizing repeated calls when backtracking through recursive expressions
936    _exprArgCache = {}
937
938    def resetCache():
939        ParserElement._exprArgCache.clear()
940    resetCache = staticmethod(resetCache)
941
942    _packratEnabled = False
943
944    def enablePackrat():
945        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
946           Repeated parse attempts at the same string location (which happens
947           often in many complex grammars) can immediately return a cached value,
948           instead of re-executing parsing/validating code.  Memoizing is done of
949           both valid results and parsing exceptions.
950
951           This speedup may break existing programs that use parse actions that
952           have side-effects.  For this reason, packrat parsing is disabled when
953           you first import pyparsing.  To activate the packrat feature, your
954           program must call the class method ParserElement.enablePackrat().  If
955           your program uses psyco to "compile as you go", you must call
956           enablePackrat before calling psyco.full().  If you do not do this,
957           Python will crash.  For best results, call enablePackrat() immediately
958           after importing pyparsing.
959        """
960        if not ParserElement._packratEnabled:
961            ParserElement._packratEnabled = True
962            ParserElement._parse = ParserElement._parseCache
963    enablePackrat = staticmethod(enablePackrat)
964
965    def parseString(self, instring, parseAll=False):
966        """Execute the parse expression with the given string.
967           This is the main interface to the client code, once the complete
968           expression has been built.
969
970           If you want the grammar to require that the entire input string be
971           successfully parsed, then set parseAll to True (equivalent to ending
972           the grammar with StringEnd()).
973
974           Note: parseString implicitly calls expandtabs() on the input string,
975           in order to report proper column numbers in parse actions.
976           If the input string contains tabs and
977           the grammar uses parse actions that use the loc argument to index into the
978           string being parsed, you can ensure you have a consistent view of the input
979           string by:
980            - calling parseWithTabs on your grammar before calling parseString
981              (see L{I{parseWithTabs}<parseWithTabs>})
982            - define your parse action using the full (s,loc,toks) signature, and
983              reference the input string using the parse action's s argument
984            - explictly expand the tabs in your input string before calling
985              parseString
986        """
987        ParserElement.resetCache()
988        if not self.streamlined:
989            self.streamline()
990            # ~ self.saveAsList = True
991        for e in self.ignoreExprs:
992            e.streamline()
993        if not self.keepTabs:
994            instring = instring.expandtabs()
995        loc, tokens = self._parse(instring, 0)
996        if parseAll:
997            StringEnd()._parse(instring, loc)
998        return tokens
999
1000    def scanString(self, instring, maxMatches=_MAX_INT):
1001        """Scan the input string for expression matches.  Each match will return the
1002           matching tokens, start location, and end location.  May be called with optional
1003           maxMatches argument, to clip scanning after 'n' matches are found.
1004
1005           Note that the start and end locations are reported relative to the string
1006           being parsed.  See L{I{parseString}<parseString>} for more information on parsing
1007           strings with embedded tabs."""
1008        if not self.streamlined:
1009            self.streamline()
1010        for e in self.ignoreExprs:
1011            e.streamline()
1012
1013        if not self.keepTabs:
1014            instring = str(instring).expandtabs()
1015        instrlen = len(instring)
1016        loc = 0
1017        preparseFn = self.preParse
1018        parseFn = self._parse
1019        ParserElement.resetCache()
1020        matches = 0
1021        while loc <= instrlen and matches < maxMatches:
1022            try:
1023                preloc = preparseFn(instring, loc)
1024                nextLoc, tokens = parseFn(instring, preloc, callPreParse=False)
1025            except ParseException:
1026                loc = preloc+1
1027            else:
1028                matches += 1
1029                yield tokens, preloc, nextLoc
1030                loc = nextLoc
1031
1032    def transformString(self, instring):
1033        """Extension to scanString, to modify matching text with modified tokens that may
1034           be returned from a parse action.  To use transformString, define a grammar and
1035           attach a parse action to it that modifies the returned token list.
1036           Invoking transformString() on a target string will then scan for matches,
1037           and replace the matched text patterns according to the logic in the parse
1038           action.  transformString() returns the resulting transformed string."""
1039        out = []
1040        lastE = 0
1041        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1042        # keep string locs straight between transformString and scanString
1043        self.keepTabs = True
1044        for t, s, e in self.scanString(instring):
1045            out.append(instring[lastE:s])
1046            if t:
1047                if isinstance(t, ParseResults):
1048                    out += t.asList()
1049                elif isinstance(t, list):
1050                    out += t
1051                else:
1052                    out.append(t)
1053            lastE = e
1054        out.append(instring[lastE:])
1055        return "".join(map(str, out))
1056
1057    def searchString(self, instring, maxMatches=_MAX_INT):
1058        """Another extension to scanString, simplifying the access to the tokens found
1059           to match the given parse expression.  May be called with optional
1060           maxMatches argument, to clip searching after 'n' matches are found.
1061        """
1062        return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)])
1063
1064    def __add__(self, other):
1065        """Implementation of + operator - returns And"""
1066        if isinstance(other, str):
1067            other = Literal(other)
1068        if not isinstance(other, ParserElement):
1069            warnings.warn(
1070                "Cannot combine element of type %s with ParserElement" % type(other),
1071                SyntaxWarning,
1072                stacklevel=2)
1073            return None
1074        return And([self, other])
1075
1076    def __radd__(self, other):
1077        """Implementation of + operator when left operand is not a ParserElement"""
1078        if isinstance(other, str):
1079            other = Literal(other)
1080        if not isinstance(other, ParserElement):
1081            warnings.warn(
1082                "Cannot combine element of type %s with ParserElement" % type(other),
1083                SyntaxWarning,
1084                stacklevel=2)
1085            return None
1086        return other + self
1087
1088    def __sub__(self, other):
1089        """Implementation of - operator, returns And with error stop"""
1090        if isinstance(other, str):
1091            other = Literal(other)
1092        if not isinstance(other, ParserElement):
1093            warnings.warn(
1094                "Cannot combine element of type %s with ParserElement" % type(other),
1095                SyntaxWarning,
1096                stacklevel=2)
1097            return None
1098        return And([self, And._ErrorStop(), other])
1099
1100    def __rsub__(self, other):
1101        """Implementation of - operator when left operand is not a ParserElement"""
1102        if isinstance(other, str):
1103            other = Literal(other)
1104        if not isinstance(other, ParserElement):
1105            warnings.warn(
1106                "Cannot combine element of type %s with ParserElement" % type(other),
1107                SyntaxWarning,
1108                stacklevel=2)
1109            return None
1110        return other - self
1111
1112    def __mul__(self, other):
1113        if isinstance(other, int):
1114            minElements, optElements = other, 0
1115        elif isinstance(other, tuple):
1116            if len(other) == 0:
1117                other = (None, None)
1118            elif len(other) == 1:
1119                other = (other[0], None)
1120            if len(other) == 2:
1121                if other[0] is None:
1122                    other = (0, other[1])
1123                if isinstance(other[0], int) and other[1] is None:
1124                    if other[0] == 0:
1125                        return ZeroOrMore(self)
1126                    if other[0] == 1:
1127                        return OneOrMore(self)
1128                    else:
1129                        return self*other[0] + ZeroOrMore(self)
1130                elif isinstance(other[0], int) and isinstance(other[1], int):
1131                    minElements, optElements = other
1132                    optElements -= minElements
1133                else:
1134                    raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]), type(other[1]))
1135            else:
1136                raise TypeError("can only multiply 'ParserElement' and int or (int,int) objects")
1137        else:
1138            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1139
1140        if minElements < 0:
1141            raise ValueError("cannot multiply ParserElement by negative value")
1142        if optElements < 0:
1143            raise ValueError("second tuple value must be greater or equal to first tuple value")
1144        if minElements == optElements == 0:
1145            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1146
1147        if optElements:
1148            def makeOptionalList(n):
1149                if n > 1:
1150                    return Optional(self + makeOptionalList(n-1))
1151                else:
1152                    return Optional(self)
1153            if minElements:
1154                if minElements == 1:
1155                    ret = self + makeOptionalList(optElements)
1156                else:
1157                    ret = And([self]*minElements) + makeOptionalList(optElements)
1158            else:
1159                ret = makeOptionalList(optElements)
1160        else:
1161            if minElements == 1:
1162                ret = self
1163            else:
1164                ret = And([self]*minElements)
1165        return ret
1166
1167    def __rmul__(self, other):
1168        return self.__mul__(other)
1169
1170    def __or__(self, other):
1171        """Implementation of | operator - returns MatchFirst"""
1172        if isinstance(other, str):
1173            other = Literal(other)
1174        if not isinstance(other, ParserElement):
1175            warnings.warn(
1176                "Cannot combine element of type %s with ParserElement" % type(other),
1177                SyntaxWarning,
1178                stacklevel=2)
1179            return None
1180        return MatchFirst([self, other])
1181
1182    def __ror__(self, other):
1183        """Implementation of | operator when left operand is not a ParserElement"""
1184        if isinstance(other, str):
1185            other = Literal(other)
1186        if not isinstance(other, ParserElement):
1187            warnings.warn(
1188                "Cannot combine element of type %s with ParserElement" % type(other),
1189                SyntaxWarning,
1190                stacklevel=2)
1191            return None
1192        return other | self
1193
1194    def __xor__(self, other):
1195        """Implementation of ^ operator - returns Or"""
1196        if isinstance(other, str):
1197            other = Literal(other)
1198        if not isinstance(other, ParserElement):
1199            warnings.warn(
1200                "Cannot combine element of type %s with ParserElement" % type(other),
1201                SyntaxWarning,
1202                stacklevel=2)
1203            return None
1204        return Or([self, other])
1205
1206    def __rxor__(self, other):
1207        """Implementation of ^ operator when left operand is not a ParserElement"""
1208        if isinstance(other, str):
1209            other = Literal(other)
1210        if not isinstance(other, ParserElement):
1211            warnings.warn(
1212                "Cannot combine element of type %s with ParserElement" % type(other),
1213                SyntaxWarning,
1214                stacklevel=2)
1215            return None
1216        return other ^ self
1217
1218    def __and__(self, other):
1219        """Implementation of & operator - returns Each"""
1220        if isinstance(other, str):
1221            other = Literal(other)
1222        if not isinstance(other, ParserElement):
1223            warnings.warn(
1224                "Cannot combine element of type %s with ParserElement" % type(other),
1225                SyntaxWarning,
1226                stacklevel=2)
1227            return None
1228        return Each([self, other])
1229
1230    def __rand__(self, other):
1231        """Implementation of & operator when left operand is not a ParserElement"""
1232        if isinstance(other, str):
1233            other = Literal(other)
1234        if not isinstance(other, ParserElement):
1235            warnings.warn(
1236                "Cannot combine element of type %s with ParserElement" % type(other),
1237                SyntaxWarning,
1238                stacklevel=2)
1239            return None
1240        return other & self
1241
1242    def __invert__(self):
1243        """Implementation of ~ operator - returns NotAny"""
1244        return NotAny(self)
1245
1246    def __call__(self, name):
1247        """Shortcut for setResultsName, with listAllMatches=default::
1248             userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1249           could be written as::
1250             userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1251           """
1252        return self.setResultsName(name)
1253
1254    def suppress(self):
1255        """Suppresses the output of this ParserElement; useful to keep punctuation from
1256           cluttering up returned output.
1257        """
1258        return Suppress(self)
1259
1260    def leaveWhitespace(self):
1261        """Disables the skipping of whitespace before matching the characters in the
1262           ParserElement's defined pattern.  This is normally only used internally by
1263           the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1264        """
1265        self.skipWhitespace = False
1266        return self
1267
1268    def setWhitespaceChars(self, chars):
1269        """Overrides the default whitespace chars
1270        """
1271        self.skipWhitespace = True
1272        self.whiteChars = chars
1273        self.copyDefaultWhiteChars = False
1274        return self
1275
1276    def parseWithTabs(self):
1277        """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1278           Must be called before parseString when the input grammar contains elements that
1279           match <TAB> characters."""
1280        self.keepTabs = True
1281        return self
1282
1283    def ignore(self, other):
1284        """Define expression to be ignored (e.g., comments) while doing pattern
1285           matching; may be called repeatedly, to define multiple comment or other
1286           ignorable patterns.
1287        """
1288        if isinstance(other, Suppress):
1289            if other not in self.ignoreExprs:
1290                self.ignoreExprs.append(other)
1291        else:
1292            self.ignoreExprs.append(Suppress(other))
1293        return self
1294
1295    def setDebugActions(self, startAction, successAction, exceptionAction):
1296        """Enable display of debugging messages while doing pattern matching."""
1297        self.debugActions = (startAction or _defaultStartDebugAction,
1298                             successAction or _defaultSuccessDebugAction,
1299                             exceptionAction or _defaultExceptionDebugAction)
1300        self.debug = True
1301        return self
1302
1303    def setDebug(self, flag=True):
1304        """Enable display of debugging messages while doing pattern matching.
1305           Set flag to True to enable, False to disable."""
1306        if flag:
1307            self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction)
1308        else:
1309            self.debug = False
1310        return self
1311
1312    def __str__(self):
1313        return self.name
1314
1315    def __repr__(self):
1316        return str(self)
1317
1318    def streamline(self):
1319        self.streamlined = True
1320        self.strRepr = None
1321        return self
1322
1323    def checkRecursion(self, parseElementList):
1324        pass
1325
1326    def validate(self, validateTrace=None):
1327        """Check defined expressions for valid structure, check for infinite recursive definitions."""
1328        self.checkRecursion([])
1329
1330    def parseFile(self, file_or_filename):
1331        """Execute the parse expression on the given file or filename.
1332           If a filename is specified (instead of a file object),
1333           the entire file is opened, read, and closed before parsing.
1334        """
1335        try:
1336            file_contents = file_or_filename.read()
1337        except AttributeError:
1338            f = open(file_or_filename, "rb")
1339            file_contents = f.read()
1340            f.close()
1341        return self.parseString(file_contents)
1342
1343    def getException(self):
1344        return ParseException("", 0, self.errmsg, self)
1345
1346    def __getattr__(self, aname):
1347        if aname == "myException":
1348            self.myException = ret = self.getException()
1349            return ret
1350        else:
1351            raise AttributeError("no such attribute " + aname)
1352
1353    def __eq__(self, other):
1354        if isinstance(other, str):
1355            try:
1356                (self + StringEnd()).parseString(str(other))
1357                return True
1358            except ParseBaseException:
1359                return False
1360        else:
1361            return super() == other
1362
1363    def __hash__(self):
1364        return hash(id(self))
1365
1366    def __req__(self, other):
1367        return self == other
1368
1369
1370class Token(ParserElement):
1371    """Abstract ParserElement subclass, for defining atomic matching patterns."""
1372
1373    def __init__(self):
1374        super().__init__(savelist=False)
1375
1376    def setName(self, name):
1377        s = super().setName(name)
1378        self.errmsg = "Expected " + self.name
1379        return s
1380
1381
1382class Empty(Token):
1383    """An empty token, will always match."""
1384
1385    def __init__(self):
1386        super().__init__()
1387        self.name = "Empty"
1388        self.mayReturnEmpty = True
1389        self.mayIndexError = False
1390
1391
1392class NoMatch(Token):
1393    """A token that will never match."""
1394
1395    def __init__(self):
1396        super().__init__()
1397        self.name = "NoMatch"
1398        self.mayReturnEmpty = True
1399        self.mayIndexError = False
1400        self.errmsg = "Unmatchable token"
1401
1402    def parseImpl(self, instring, loc, doActions=True):
1403        exc = self.myException
1404        exc.loc = loc
1405        exc.pstr = instring
1406        raise exc
1407
1408
1409class Literal(Token):
1410    """Token to exactly match a specified string."""
1411
1412    def __init__(self, matchString):
1413        super().__init__()
1414        self.match = matchString
1415        self.matchLen = len(matchString)
1416        try:
1417            self.firstMatchChar = matchString[0]
1418        except IndexError:
1419            warnings.warn(
1420                "null string passed to Literal; use Empty() instead",
1421                SyntaxWarning,
1422                stacklevel=2)
1423            self.__class__ = Empty
1424        self.name = '"%s"' % str(self.match)
1425        self.errmsg = "Expected " + self.name
1426        self.mayReturnEmpty = False
1427        self.mayIndexError = False
1428
1429    # Performance tuning: this routine gets called a *lot*
1430    # if this is a single character match string  and the first character matches,
1431    # short-circuit as quickly as possible, and avoid calling startswith
1432    # ~ @profile
1433    def parseImpl(self, instring, loc, doActions=True):
1434        if (instring[loc] == self.firstMatchChar
1435                and (self.matchLen == 1 or instring.startswith(self.match, loc))):
1436            return loc+self.matchLen, self.match
1437        # ~ raise ParseException( instring, loc, self.errmsg )
1438        exc = self.myException
1439        exc.loc = loc
1440        exc.pstr = instring
1441        raise exc
1442
1443
1444_L = Literal
1445
1446
1447class Keyword(Token):
1448    """Token to exactly match a specified string as a keyword, that is, it must be
1449       immediately followed by a non-keyword character.  Compare with Literal::
1450         Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1451         Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1452       Accepts two optional constructor arguments in addition to the keyword string:
1453       identChars is a string of characters that would be valid identifier characters,
1454       defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1455       matching, default is False.
1456    """
1457    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1458
1459    def __init__(self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False):
1460        super().__init__()
1461        self.match = matchString
1462        self.matchLen = len(matchString)
1463        try:
1464            self.firstMatchChar = matchString[0]
1465        except IndexError:
1466            warnings.warn(
1467                "null string passed to Keyword; use Empty() instead",
1468                SyntaxWarning,
1469                stacklevel=2)
1470        self.name = '"%s"' % self.match
1471        self.errmsg = "Expected " + self.name
1472        self.mayReturnEmpty = False
1473        self.mayIndexError = False
1474        self.caseless = caseless
1475        if caseless:
1476            self.caselessmatch = matchString.upper()
1477            identChars = identChars.upper()
1478        self.identChars = _str2dict(identChars)
1479
1480    def parseImpl(self, instring, loc, doActions=True):
1481        if self.caseless:
1482            if ((instring[loc:loc+self.matchLen].upper() == self.caselessmatch)
1483                    and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars)
1484                    and (loc == 0 or instring[loc-1].upper() not in self.identChars)):
1485                return loc+self.matchLen, self.match
1486        else:
1487            if (instring[loc] == self.firstMatchChar
1488                    and (self.matchLen == 1 or instring.startswith(self.match, loc))
1489                    and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars)
1490                    and (loc == 0 or instring[loc-1] not in self.identChars)):
1491                return loc+self.matchLen, self.match
1492        # ~ raise ParseException( instring, loc, self.errmsg )
1493        exc = self.myException
1494        exc.loc = loc
1495        exc.pstr = instring
1496        raise exc
1497
1498    def copy(self):
1499        c = super().copy()
1500        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1501        return c
1502
1503    def setDefaultKeywordChars(chars):
1504        """Overrides the default Keyword chars
1505        """
1506        Keyword.DEFAULT_KEYWORD_CHARS = chars
1507    setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1508
1509
1510class CaselessLiteral(Literal):
1511    """Token to match a specified string, ignoring case of letters.
1512       Note: the matched results will always be in the case of the given
1513       match string, NOT the case of the input text.
1514    """
1515
1516    def __init__(self, matchString):
1517        super().__init__(matchString.upper())
1518        # Preserve the defining literal.
1519        self.returnString = matchString
1520        self.name = "'%s'" % self.returnString
1521        self.errmsg = "Expected " + self.name
1522
1523    def parseImpl(self, instring, loc, doActions=True):
1524        if instring[loc:loc+self.matchLen].upper() == self.match:
1525            return loc+self.matchLen, self.returnString
1526        # ~ raise ParseException( instring, loc, self.errmsg )
1527        exc = self.myException
1528        exc.loc = loc
1529        exc.pstr = instring
1530        raise exc
1531
1532
1533class CaselessKeyword(Keyword):
1534    def __init__(self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS):
1535        super().__init__(matchString, identChars, caseless=True)
1536
1537    def parseImpl(self, instring, loc, doActions=True):
1538        if ((instring[loc:loc+self.matchLen].upper() == self.caselessmatch)
1539                and (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars)):
1540            return loc+self.matchLen, self.match
1541        # ~ raise ParseException( instring, loc, self.errmsg )
1542        exc = self.myException
1543        exc.loc = loc
1544        exc.pstr = instring
1545        raise exc
1546
1547
1548class Word(Token):
1549    """Token for matching words composed of allowed character sets.
1550       Defined with string containing all allowed initial characters,
1551       an optional string containing allowed body characters (if omitted,
1552       defaults to the initial character set), and an optional minimum,
1553       maximum, and/or exact length.  The default value for min is 1 (a
1554       minimum value < 1 is not valid); the default values for max and exact
1555       are 0, meaning no maximum or exact length restriction.
1556    """
1557
1558    def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False):
1559        super().__init__()
1560        self.initCharsOrig = initChars
1561        self.initChars = _str2dict(initChars)
1562        if bodyChars:
1563            self.bodyCharsOrig = bodyChars
1564            self.bodyChars = _str2dict(bodyChars)
1565        else:
1566            self.bodyCharsOrig = initChars
1567            self.bodyChars = _str2dict(initChars)
1568
1569        self.maxSpecified = max > 0
1570
1571        if min < 1:
1572            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1573
1574        self.minLen = min
1575
1576        if max > 0:
1577            self.maxLen = max
1578        else:
1579            self.maxLen = _MAX_INT
1580
1581        if exact > 0:
1582            self.maxLen = exact
1583            self.minLen = exact
1584
1585        self.name = str(self)
1586        self.errmsg = "Expected " + self.name
1587        self.mayIndexError = False
1588        self.asKeyword = asKeyword
1589
1590        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0):
1591            if self.bodyCharsOrig == self.initCharsOrig:
1592                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1593            elif len(self.bodyCharsOrig) == 1:
1594                self.reString = "{}[{}]*".format(
1595                    re.escape(self.initCharsOrig),
1596                    _escapeRegexRangeChars(self.bodyCharsOrig))
1597            else:
1598                self.reString = "[{}][{}]*".format(
1599                    _escapeRegexRangeChars(self.initCharsOrig),
1600                    _escapeRegexRangeChars(self.bodyCharsOrig))
1601            if self.asKeyword:
1602                self.reString = r"\b"+self.reString+r"\b"
1603            try:
1604                self.re = re.compile(self.reString)
1605            except Exception:
1606                self.re = None
1607
1608    def parseImpl(self, instring, loc, doActions=True):
1609        if self.re:
1610            result = self.re.match(instring, loc)
1611            if not result:
1612                exc = self.myException
1613                exc.loc = loc
1614                exc.pstr = instring
1615                raise exc
1616
1617            loc = result.end()
1618            return loc, result.group()
1619
1620        if not(instring[loc] in self.initChars):
1621            # ~ raise ParseException( instring, loc, self.errmsg )
1622            exc = self.myException
1623            exc.loc = loc
1624            exc.pstr = instring
1625            raise exc
1626        start = loc
1627        loc += 1
1628        instrlen = len(instring)
1629        bodychars = self.bodyChars
1630        maxloc = start + self.maxLen
1631        maxloc = min(maxloc, instrlen)
1632        while loc < maxloc and instring[loc] in bodychars:
1633            loc += 1
1634
1635        throwException = False
1636        if loc - start < self.minLen:
1637            throwException = True
1638        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1639            throwException = True
1640        if self.asKeyword:
1641            if (start > 0 and instring[start-1] in bodychars) or (loc < instrlen and instring[loc] in bodychars):
1642                throwException = True
1643
1644        if throwException:
1645            # ~ raise ParseException( instring, loc, self.errmsg )
1646            exc = self.myException
1647            exc.loc = loc
1648            exc.pstr = instring
1649            raise exc
1650
1651        return loc, instring[start:loc]
1652
1653    def __str__(self):
1654        try:
1655            return super().__str__()
1656        except Exception:
1657            pass
1658
1659        if self.strRepr is None:
1660
1661            def charsAsStr(s):
1662                if len(s) > 4:
1663                    return s[:4]+"..."
1664                else:
1665                    return s
1666
1667            if self.initCharsOrig != self.bodyCharsOrig:
1668                self.strRepr = f"W:({charsAsStr(self.initCharsOrig)},{charsAsStr(self.bodyCharsOrig)})"
1669            else:
1670                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1671
1672        return self.strRepr
1673
1674
1675class Regex(Token):
1676    """Token for matching strings that match a given regular expression.
1677       Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1678    """
1679
1680    def __init__(self, pattern, flags=0):
1681        """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1682        super().__init__()
1683
1684        if len(pattern) == 0:
1685            warnings.warn(
1686                "null string passed to Regex; use Empty() instead",
1687                SyntaxWarning,
1688                stacklevel=2)
1689
1690        self.pattern = pattern
1691        self.flags = flags
1692
1693        try:
1694            self.re = re.compile(self.pattern, self.flags)
1695            self.reString = self.pattern
1696        except sre_constants.error:
1697            warnings.warn(
1698                "invalid pattern (%s) passed to Regex" % pattern,
1699                SyntaxWarning,
1700                stacklevel=2)
1701            raise
1702
1703        self.name = str(self)
1704        self.errmsg = "Expected " + self.name
1705        self.mayIndexError = False
1706        self.mayReturnEmpty = True
1707
1708    def parseImpl(self, instring, loc, doActions=True):
1709        result = self.re.match(instring, loc)
1710        if not result:
1711            exc = self.myException
1712            exc.loc = loc
1713            exc.pstr = instring
1714            raise exc
1715
1716        loc = result.end()
1717        d = result.groupdict()
1718        ret = ParseResults(result.group())
1719        if d:
1720            for k in d:
1721                ret[k] = d[k]
1722        return loc, ret
1723
1724    def __str__(self):
1725        try:
1726            return super().__str__()
1727        except Exception:
1728            pass
1729
1730        if self.strRepr is None:
1731            self.strRepr = "Re:(%s)" % repr(self.pattern)
1732
1733        return self.strRepr
1734
1735
1736class QuotedString(Token):
1737    """Token for matching strings that are delimited by quoting characters.
1738    """
1739
1740    def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1741        """
1742           Defined with the following parameters:
1743            - quoteChar - string of one or more characters defining the quote delimiting string
1744            - escChar - character to escape quotes, typically backslash (default=None)
1745            - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1746            - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1747            - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1748            - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1749        """
1750        super().__init__()
1751
1752        # remove white space from quote chars - wont work anyway
1753        quoteChar = quoteChar.strip()
1754        if len(quoteChar) == 0:
1755            warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
1756            raise SyntaxError()
1757
1758        if endQuoteChar is None:
1759            endQuoteChar = quoteChar
1760        else:
1761            endQuoteChar = endQuoteChar.strip()
1762            if len(endQuoteChar) == 0:
1763                warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2)
1764                raise SyntaxError()
1765
1766        self.quoteChar = quoteChar
1767        self.quoteCharLen = len(quoteChar)
1768        self.firstQuoteChar = quoteChar[0]
1769        self.endQuoteChar = endQuoteChar
1770        self.endQuoteCharLen = len(endQuoteChar)
1771        self.escChar = escChar
1772        self.escQuote = escQuote
1773        self.unquoteResults = unquoteResults
1774
1775        if multiline:
1776            self.flags = re.MULTILINE | re.DOTALL
1777            self.pattern = r'{}(?:[^{}{}]'.format(
1778                re.escape(self.quoteChar),
1779                _escapeRegexRangeChars(self.endQuoteChar[0]),
1780                (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
1781        else:
1782            self.flags = 0
1783            self.pattern = r'{}(?:[^{}\n\r{}]'.format(
1784                re.escape(self.quoteChar),
1785                _escapeRegexRangeChars(self.endQuoteChar[0]),
1786                (escChar is not None and _escapeRegexRangeChars(escChar) or ''))
1787        if len(self.endQuoteChar) > 1:
1788            self.pattern += (
1789                '|(?:' + ')|(?:'.join(["{}[^{}]".format(
1790                    re.escape(self.endQuoteChar[:i]),
1791                    _escapeRegexRangeChars(self.endQuoteChar[i])
1792                ) for i in range(len(self.endQuoteChar)-1, 0, -1)]) + ')')
1793        if escQuote:
1794            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1795        if escChar:
1796            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1797            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1798        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1799
1800        try:
1801            self.re = re.compile(self.pattern, self.flags)
1802            self.reString = self.pattern
1803        except sre_constants.error:
1804            warnings.warn(
1805                "invalid pattern (%s) passed to Regex" % self.pattern,
1806                SyntaxWarning,
1807                stacklevel=2)
1808            raise
1809
1810        self.name = str(self)
1811        self.errmsg = "Expected " + self.name
1812        self.mayIndexError = False
1813        self.mayReturnEmpty = True
1814
1815    def parseImpl(self, instring, loc, doActions=True):
1816        result = instring[loc] == self.firstQuoteChar and self.re.match(instring, loc) or None
1817        if not result:
1818            exc = self.myException
1819            exc.loc = loc
1820            exc.pstr = instring
1821            raise exc
1822
1823        loc = result.end()
1824        ret = result.group()
1825
1826        if self.unquoteResults:
1827
1828            # strip off quotes
1829            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1830
1831            if isinstance(ret, str):
1832                # replace escaped characters
1833                if self.escChar:
1834                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
1835
1836                # replace escaped quotes
1837                if self.escQuote:
1838                    ret = ret.replace(self.escQuote, self.endQuoteChar)
1839
1840        return loc, ret
1841
1842    def __str__(self):
1843        try:
1844            return super().__str__()
1845        except Exception:
1846            pass
1847
1848        if self.strRepr is None:
1849            self.strRepr = f"quoted string, starting with {self.quoteChar} ending with {self.endQuoteChar}"
1850
1851        return self.strRepr
1852
1853
1854class CharsNotIn(Token):
1855    """Token for matching words composed of characters *not* in a given set.
1856       Defined with string containing all disallowed characters, and an optional
1857       minimum, maximum, and/or exact length.  The default value for min is 1 (a
1858       minimum value < 1 is not valid); the default values for max and exact
1859       are 0, meaning no maximum or exact length restriction.
1860    """
1861
1862    def __init__(self, notChars, min=1, max=0, exact=0):
1863        super().__init__()
1864        self.skipWhitespace = False
1865        self.notChars = notChars
1866
1867        if min < 1:
1868            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1869
1870        self.minLen = min
1871
1872        if max > 0:
1873            self.maxLen = max
1874        else:
1875            self.maxLen = _MAX_INT
1876
1877        if exact > 0:
1878            self.maxLen = exact
1879            self.minLen = exact
1880
1881        self.name = str(self)
1882        self.errmsg = "Expected " + self.name
1883        self.mayReturnEmpty = (self.minLen == 0)
1884        self.mayIndexError = False
1885
1886    def parseImpl(self, instring, loc, doActions=True):
1887        if instring[loc] in self.notChars:
1888            # ~ raise ParseException( instring, loc, self.errmsg )
1889            exc = self.myException
1890            exc.loc = loc
1891            exc.pstr = instring
1892            raise exc
1893
1894        start = loc
1895        loc += 1
1896        notchars = self.notChars
1897        maxlen = min(start+self.maxLen, len(instring))
1898        while loc < maxlen and (instring[loc] not in notchars):
1899            loc += 1
1900
1901        if loc - start < self.minLen:
1902            # ~ raise ParseException( instring, loc, self.errmsg )
1903            exc = self.myException
1904            exc.loc = loc
1905            exc.pstr = instring
1906            raise exc
1907
1908        return loc, instring[start:loc]
1909
1910    def __str__(self):
1911        try:
1912            return super().__str__()
1913        except Exception:
1914            pass
1915
1916        if self.strRepr is None:
1917            if len(self.notChars) > 4:
1918                self.strRepr = "!W:(%s...)" % self.notChars[:4]
1919            else:
1920                self.strRepr = "!W:(%s)" % self.notChars
1921
1922        return self.strRepr
1923
1924
1925class White(Token):
1926    """Special matching class for matching whitespace.  Normally, whitespace is ignored
1927       by pyparsing grammars.  This class is included when some whitespace structures
1928       are significant.  Define with a string containing the whitespace characters to be
1929       matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
1930       as defined for the Word class."""
1931    whiteStrs = {
1932        " ": "<SPC>",
1933        "\t": "<TAB>",
1934        "\n": "<LF>",
1935        "\r": "<CR>",
1936        "\f": "<FF>",
1937    }
1938
1939    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1940        super().__init__()
1941        self.matchWhite = ws
1942        self.setWhitespaceChars("".join([c for c in self.whiteChars if c not in self.matchWhite]))
1943        self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1944        self.mayReturnEmpty = True
1945        self.errmsg = "Expected " + self.name
1946
1947        self.minLen = min
1948
1949        if max > 0:
1950            self.maxLen = max
1951        else:
1952            self.maxLen = _MAX_INT
1953
1954        if exact > 0:
1955            self.maxLen = exact
1956            self.minLen = exact
1957
1958    def parseImpl(self, instring, loc, doActions=True):
1959        if not(instring[loc] in self.matchWhite):
1960            exc = self.myException
1961            exc.loc = loc
1962            exc.pstr = instring
1963            raise exc
1964        start = loc
1965        loc += 1
1966        maxloc = start + self.maxLen
1967        maxloc = min(maxloc, len(instring))
1968        while loc < maxloc and instring[loc] in self.matchWhite:
1969            loc += 1
1970
1971        if loc - start < self.minLen:
1972            # ~ raise ParseException( instring, loc, self.errmsg )
1973            exc = self.myException
1974            exc.loc = loc
1975            exc.pstr = instring
1976            raise exc
1977
1978        return loc, instring[start:loc]
1979
1980
1981class _PositionToken(Token):
1982    def __init__(self):
1983        super().__init__()
1984        self.name = self.__class__.__name__
1985        self.mayReturnEmpty = True
1986        self.mayIndexError = False
1987
1988
1989class GoToColumn(_PositionToken):
1990    """Token to advance to a specific column of input text; useful for tabular report scraping."""
1991
1992    def __init__(self, colno):
1993        super().__init__()
1994        self.col = colno
1995
1996    def preParse(self, instring, loc):
1997        if col(loc, instring) != self.col:
1998            instrlen = len(instring)
1999            if self.ignoreExprs:
2000                loc = self._skipIgnorables(instring, loc)
2001            while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col:
2002                loc += 1
2003        return loc
2004
2005    def parseImpl(self, instring, loc, doActions=True):
2006        thiscol = col(loc, instring)
2007        if thiscol > self.col:
2008            raise ParseException(instring, loc, "Text not in expected column", self)
2009        newloc = loc + self.col - thiscol
2010        ret = instring[loc: newloc]
2011        return newloc, ret
2012
2013
2014class LineStart(_PositionToken):
2015    """Matches if current position is at the beginning of a line within the parse string"""
2016
2017    def __init__(self):
2018        super().__init__()
2019        self.setWhitespaceChars(" \t")
2020        self.errmsg = "Expected start of line"
2021
2022    def preParse(self, instring, loc):
2023        preloc = super().preParse(instring, loc)
2024        if instring[preloc] == "\n":
2025            loc += 1
2026        return loc
2027
2028    def parseImpl(self, instring, loc, doActions=True):
2029        if not(loc == 0
2030                or (loc == self.preParse(instring, 0))
2031                or (instring[loc-1] == "\n")):  # col(loc, instring) != 1:
2032            exc = self.myException
2033            exc.loc = loc
2034            exc.pstr = instring
2035            raise exc
2036        return loc, []
2037
2038
2039class LineEnd(_PositionToken):
2040    """Matches if current position is at the end of a line within the parse string"""
2041
2042    def __init__(self):
2043        super().__init__()
2044        self.setWhitespaceChars(" \t")
2045        self.errmsg = "Expected end of line"
2046
2047    def parseImpl(self, instring, loc, doActions=True):
2048        if loc < len(instring):
2049            if instring[loc] == "\n":
2050                return loc+1, "\n"
2051            else:
2052                exc = self.myException
2053                exc.loc = loc
2054                exc.pstr = instring
2055                raise exc
2056        elif loc == len(instring):
2057            return loc+1, []
2058        else:
2059            exc = self.myException
2060            exc.loc = loc
2061            exc.pstr = instring
2062            raise exc
2063
2064
2065class StringStart(_PositionToken):
2066    """Matches if current position is at the beginning of the parse string"""
2067
2068    def __init__(self):
2069        super().__init__()
2070        self.errmsg = "Expected start of text"
2071
2072    def parseImpl(self, instring, loc, doActions=True):
2073        if loc != 0:
2074            # see if entire string up to here is just whitespace and ignoreables
2075            if loc != self.preParse(instring, 0):
2076                exc = self.myException
2077                exc.loc = loc
2078                exc.pstr = instring
2079                raise exc
2080        return loc, []
2081
2082
2083class StringEnd(_PositionToken):
2084    """Matches if current position is at the end of the parse string"""
2085
2086    def __init__(self):
2087        super().__init__()
2088        self.errmsg = "Expected end of text"
2089
2090    def parseImpl(self, instring, loc, doActions=True):
2091        if loc < len(instring):
2092            exc = self.myException
2093            exc.loc = loc
2094            exc.pstr = instring
2095            raise exc
2096        elif loc == len(instring):
2097            return loc+1, []
2098        elif loc > len(instring):
2099            return loc, []
2100        else:
2101            exc = self.myException
2102            exc.loc = loc
2103            exc.pstr = instring
2104            raise exc
2105
2106
2107class WordStart(_PositionToken):
2108    """Matches if the current position is at the beginning of a Word, and
2109       is not preceded by any character in a given set of wordChars
2110       (default=printables). To emulate the \b behavior of regular expressions,
2111       use WordStart(alphanums). WordStart will also match at the beginning of
2112       the string being parsed, or at the beginning of a line.
2113    """
2114
2115    def __init__(self, wordChars=printables):
2116        super().__init__()
2117        self.wordChars = _str2dict(wordChars)
2118        self.errmsg = "Not at the start of a word"
2119
2120    def parseImpl(self, instring, loc, doActions=True):
2121        if loc != 0:
2122            if (instring[loc-1] in self.wordChars
2123                    or instring[loc] not in self.wordChars):
2124                exc = self.myException
2125                exc.loc = loc
2126                exc.pstr = instring
2127                raise exc
2128        return loc, []
2129
2130
2131class WordEnd(_PositionToken):
2132    """Matches if the current position is at the end of a Word, and
2133       is not followed by any character in a given set of wordChars
2134       (default=printables). To emulate the \b behavior of regular expressions,
2135       use WordEnd(alphanums). WordEnd will also match at the end of
2136       the string being parsed, or at the end of a line.
2137    """
2138
2139    def __init__(self, wordChars=printables):
2140        super().__init__()
2141        self.wordChars = _str2dict(wordChars)
2142        self.skipWhitespace = False
2143        self.errmsg = "Not at the end of a word"
2144
2145    def parseImpl(self, instring, loc, doActions=True):
2146        instrlen = len(instring)
2147        if instrlen > 0 and loc < instrlen:
2148            if (instring[loc] in self.wordChars
2149                    or instring[loc-1] not in self.wordChars):
2150                # ~ raise ParseException( instring, loc, "Expected end of word" )
2151                exc = self.myException
2152                exc.loc = loc
2153                exc.pstr = instring
2154                raise exc
2155        return loc, []
2156
2157
2158class ParseExpression(ParserElement):
2159    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2160
2161    def __init__(self, exprs, savelist=False):
2162        super().__init__(savelist)
2163        if isinstance(exprs, list):
2164            self.exprs = exprs
2165        elif isinstance(exprs, str):
2166            self.exprs = [Literal(exprs)]
2167        else:
2168            self.exprs = [exprs]
2169        self.callPreparse = False
2170
2171    def __getitem__(self, i):
2172        return self.exprs[i]
2173
2174    def append(self, other):
2175        self.exprs.append(other)
2176        self.strRepr = None
2177        return self
2178
2179    def leaveWhitespace(self):
2180        """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2181           all contained expressions."""
2182        self.skipWhitespace = False
2183        self.exprs = [e.copy() for e in self.exprs]
2184        for e in self.exprs:
2185            e.leaveWhitespace()
2186        return self
2187
2188    def ignore(self, other):
2189        if isinstance(other, Suppress):
2190            if other not in self.ignoreExprs:
2191                super().ignore(other)
2192                for e in self.exprs:
2193                    e.ignore(self.ignoreExprs[-1])
2194        else:
2195            super().ignore(other)
2196            for e in self.exprs:
2197                e.ignore(self.ignoreExprs[-1])
2198        return self
2199
2200    def __str__(self):
2201        try:
2202            return super().__str__()
2203        except Exception:
2204            pass
2205
2206        if self.strRepr is None:
2207            self.strRepr = f"{self.__class__.__name__}:({str(self.exprs)})"
2208        return self.strRepr
2209
2210    def streamline(self):
2211        super().streamline()
2212
2213        for e in self.exprs:
2214            e.streamline()
2215
2216        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2217        # but only if there are no parse actions or resultsNames on the nested And's
2218        # (likewise for Or's and MatchFirst's)
2219        if len(self.exprs) == 2:
2220            other = self.exprs[0]
2221            if (isinstance(other, self.__class__)
2222                    and not(other.parseAction)
2223                    and other.resultsName is None
2224                    and not other.debug):
2225                self.exprs = other.exprs[:] + [self.exprs[1]]
2226                self.strRepr = None
2227                self.mayReturnEmpty |= other.mayReturnEmpty
2228                self.mayIndexError |= other.mayIndexError
2229
2230            other = self.exprs[-1]
2231            if (isinstance(other, self.__class__)
2232                    and not(other.parseAction)
2233                    and other.resultsName is None
2234                    and not other.debug):
2235                self.exprs = self.exprs[:-1] + other.exprs[:]
2236                self.strRepr = None
2237                self.mayReturnEmpty |= other.mayReturnEmpty
2238                self.mayIndexError |= other.mayIndexError
2239
2240        return self
2241
2242    def setResultsName(self, name, listAllMatches=False):
2243        ret = super().setResultsName(name, listAllMatches)
2244        return ret
2245
2246    def validate(self, validateTrace=None):
2247        if validateTrace is None:
2248            validateTrace = []
2249        tmp = validateTrace[:]+[self]
2250        for e in self.exprs:
2251            e.validate(tmp)
2252        self.checkRecursion([])
2253
2254
2255class And(ParseExpression):
2256    """Requires all given ParseExpressions to be found in the given order.
2257       Expressions may be separated by whitespace.
2258       May be constructed using the '+' operator.
2259    """
2260
2261    class _ErrorStop(Empty):
2262        def __new__(cls, *args, **kwargs):
2263            return And._ErrorStop.instance
2264    _ErrorStop.instance = Empty()
2265    _ErrorStop.instance.leaveWhitespace()
2266
2267    def __init__(self, exprs, savelist=True):
2268        super().__init__(exprs, savelist)
2269        self.mayReturnEmpty = True
2270        for e in self.exprs:
2271            if not e.mayReturnEmpty:
2272                self.mayReturnEmpty = False
2273                break
2274        self.setWhitespaceChars(exprs[0].whiteChars)
2275        self.skipWhitespace = exprs[0].skipWhitespace
2276        self.callPreparse = True
2277
2278    def parseImpl(self, instring, loc, doActions=True):
2279        # pass False as last arg to _parse for first element, since we already
2280        # pre-parsed the string as part of our And pre-parsing
2281        loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False)
2282        errorStop = False
2283        for e in self.exprs[1:]:
2284            if e is And._ErrorStop.instance:
2285                errorStop = True
2286                continue
2287            if errorStop:
2288                try:
2289                    loc, exprtokens = e._parse(instring, loc, doActions)
2290                except ParseBaseException as pe:
2291                    raise ParseSyntaxException(pe)
2292                except IndexError:
2293                    raise ParseSyntaxException(ParseException(instring, len(instring), self.errmsg, self))
2294            else:
2295                loc, exprtokens = e._parse(instring, loc, doActions)
2296            if exprtokens or exprtokens.keys():
2297                resultlist += exprtokens
2298        return loc, resultlist
2299
2300    def __iadd__(self, other):
2301        if isinstance(other, str):
2302            other = Literal(other)
2303        return self.append(other)  # And( [ self, other ] )
2304
2305    def checkRecursion(self, parseElementList):
2306        subRecCheckList = parseElementList[:] + [self]
2307        for e in self.exprs:
2308            e.checkRecursion(subRecCheckList)
2309            if not e.mayReturnEmpty:
2310                break
2311
2312    def __str__(self):
2313        if hasattr(self, "name"):
2314            return self.name
2315
2316        if self.strRepr is None:
2317            self.strRepr = "{" + " ".join([str(e) for e in self.exprs]) + "}"
2318
2319        return self.strRepr
2320
2321
2322class Or(ParseExpression):
2323    """Requires that at least one ParseExpression is found.
2324       If two expressions match, the expression that matches the longest string will be used.
2325       May be constructed using the '^' operator.
2326    """
2327
2328    def __init__(self, exprs, savelist=False):
2329        super().__init__(exprs, savelist)
2330        self.mayReturnEmpty = False
2331        for e in self.exprs:
2332            if e.mayReturnEmpty:
2333                self.mayReturnEmpty = True
2334                break
2335
2336    def parseImpl(self, instring, loc, doActions=True):
2337        maxExcLoc = -1
2338        maxMatchLoc = -1
2339        maxException = None
2340        for e in self.exprs:
2341            try:
2342                loc2 = e.tryParse(instring, loc)
2343            except ParseException as err:
2344                if err.loc > maxExcLoc:
2345                    maxException = err
2346                    maxExcLoc = err.loc
2347            except IndexError:
2348                if len(instring) > maxExcLoc:
2349                    maxException = ParseException(instring, len(instring), e.errmsg, self)
2350                    maxExcLoc = len(instring)
2351            else:
2352                if loc2 > maxMatchLoc:
2353                    maxMatchLoc = loc2
2354                    maxMatchExp = e
2355
2356        if maxMatchLoc < 0:
2357            if maxException is not None:
2358                raise maxException
2359            else:
2360                raise ParseException(instring, loc, "no defined alternatives to match", self)
2361
2362        return maxMatchExp._parse(instring, loc, doActions)
2363
2364    def __ixor__(self, other):
2365        if isinstance(other, str):
2366            other = Literal(other)
2367        return self.append(other)  # Or( [ self, other ] )
2368
2369    def __str__(self):
2370        if hasattr(self, "name"):
2371            return self.name
2372
2373        if self.strRepr is None:
2374            self.strRepr = "{" + " ^ ".join([str(e) for e in self.exprs]) + "}"
2375
2376        return self.strRepr
2377
2378    def checkRecursion(self, parseElementList):
2379        subRecCheckList = parseElementList[:] + [self]
2380        for e in self.exprs:
2381            e.checkRecursion(subRecCheckList)
2382
2383
2384class MatchFirst(ParseExpression):
2385    """Requires that at least one ParseExpression is found.
2386       If two expressions match, the first one listed is the one that will match.
2387       May be constructed using the '|' operator.
2388    """
2389
2390    def __init__(self, exprs, savelist=False):
2391        super().__init__(exprs, savelist)
2392        if exprs:
2393            self.mayReturnEmpty = False
2394            for e in self.exprs:
2395                if e.mayReturnEmpty:
2396                    self.mayReturnEmpty = True
2397                    break
2398        else:
2399            self.mayReturnEmpty = True
2400
2401    def parseImpl(self, instring, loc, doActions=True):
2402        maxExcLoc = -1
2403        maxException = None
2404        for e in self.exprs:
2405            try:
2406                ret = e._parse(instring, loc, doActions)
2407                return ret
2408            except ParseException as err:
2409                if err.loc > maxExcLoc:
2410                    maxException = err
2411                    maxExcLoc = err.loc
2412            except IndexError:
2413                if len(instring) > maxExcLoc:
2414                    maxException = ParseException(instring, len(instring), e.errmsg, self)
2415                    maxExcLoc = len(instring)
2416
2417        # only got here if no expression matched, raise exception for match that made it the furthest
2418        else:
2419            if maxException is not None:
2420                raise maxException
2421            else:
2422                raise ParseException(instring, loc, "no defined alternatives to match", self)
2423
2424    def __ior__(self, other):
2425        if isinstance(other, str):
2426            other = Literal(other)
2427        return self.append(other)  # MatchFirst( [ self, other ] )
2428
2429    def __str__(self):
2430        if hasattr(self, "name"):
2431            return self.name
2432
2433        if self.strRepr is None:
2434            self.strRepr = "{" + " | ".join([str(e) for e in self.exprs]) + "}"
2435
2436        return self.strRepr
2437
2438    def checkRecursion(self, parseElementList):
2439        subRecCheckList = parseElementList[:] + [self]
2440        for e in self.exprs:
2441            e.checkRecursion(subRecCheckList)
2442
2443
2444class Each(ParseExpression):
2445    """Requires all given ParseExpressions to be found, but in any order.
2446       Expressions may be separated by whitespace.
2447       May be constructed using the '&' operator.
2448    """
2449
2450    def __init__(self, exprs, savelist=True):
2451        super().__init__(exprs, savelist)
2452        self.mayReturnEmpty = True
2453        for e in self.exprs:
2454            if not e.mayReturnEmpty:
2455                self.mayReturnEmpty = False
2456                break
2457        self.skipWhitespace = True
2458        self.initExprGroups = True
2459
2460    def parseImpl(self, instring, loc, doActions=True):
2461        if self.initExprGroups:
2462            self.optionals = [e.expr for e in self.exprs if isinstance(e, Optional)]
2463            self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)]
2464            self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)]
2465            self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))]
2466            self.required += self.multirequired
2467            self.initExprGroups = False
2468        tmpLoc = loc
2469        tmpReqd = self.required[:]
2470        tmpOpt = self.optionals[:]
2471        matchOrder = []
2472
2473        keepMatching = True
2474        while keepMatching:
2475            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2476            failed = []
2477            for e in tmpExprs:
2478                try:
2479                    tmpLoc = e.tryParse(instring, tmpLoc)
2480                except ParseException:
2481                    failed.append(e)
2482                else:
2483                    matchOrder.append(e)
2484                    if e in tmpReqd:
2485                        tmpReqd.remove(e)
2486                    elif e in tmpOpt:
2487                        tmpOpt.remove(e)
2488            if len(failed) == len(tmpExprs):
2489                keepMatching = False
2490
2491        if tmpReqd:
2492            missing = ", ".join([str(e) for e in tmpReqd])
2493            raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing)
2494
2495        # add any unmatched Optionals, in case they have default values defined
2496        matchOrder += list(e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt)
2497
2498        resultlist = []
2499        for e in matchOrder:
2500            loc, results = e._parse(instring, loc, doActions)
2501            resultlist.append(results)
2502
2503        finalResults = ParseResults([])
2504        for r in resultlist:
2505            dups = {}
2506            for k in r.keys():
2507                if k in finalResults.keys():
2508                    tmp = ParseResults(finalResults[k])
2509                    tmp += ParseResults(r[k])
2510                    dups[k] = tmp
2511            finalResults += ParseResults(r)
2512            for k, v in dups.items():
2513                finalResults[k] = v
2514        return loc, finalResults
2515
2516    def __str__(self):
2517        if hasattr(self, "name"):
2518            return self.name
2519
2520        if self.strRepr is None:
2521            self.strRepr = "{" + " & ".join([str(e) for e in self.exprs]) + "}"
2522
2523        return self.strRepr
2524
2525    def checkRecursion(self, parseElementList):
2526        subRecCheckList = parseElementList[:] + [self]
2527        for e in self.exprs:
2528            e.checkRecursion(subRecCheckList)
2529
2530
2531class ParseElementEnhance(ParserElement):
2532    """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2533
2534    def __init__(self, expr, savelist=False):
2535        super().__init__(savelist)
2536        if isinstance(expr, str):
2537            expr = Literal(expr)
2538        self.expr = expr
2539        self.strRepr = None
2540        if expr is not None:
2541            self.mayIndexError = expr.mayIndexError
2542            self.mayReturnEmpty = expr.mayReturnEmpty
2543            self.setWhitespaceChars(expr.whiteChars)
2544            self.skipWhitespace = expr.skipWhitespace
2545            self.saveAsList = expr.saveAsList
2546            self.callPreparse = expr.callPreparse
2547            self.ignoreExprs.extend(expr.ignoreExprs)
2548
2549    def parseImpl(self, instring, loc, doActions=True):
2550        if self.expr is not None:
2551            return self.expr._parse(instring, loc, doActions, callPreParse=False)
2552        else:
2553            raise ParseException("", loc, self.errmsg, self)
2554
2555    def leaveWhitespace(self):
2556        self.skipWhitespace = False
2557        self.expr = self.expr.copy()
2558        if self.expr is not None:
2559            self.expr.leaveWhitespace()
2560        return self
2561
2562    def ignore(self, other):
2563        if isinstance(other, Suppress):
2564            if other not in self.ignoreExprs:
2565                super().ignore(other)
2566                if self.expr is not None:
2567                    self.expr.ignore(self.ignoreExprs[-1])
2568        else:
2569            super().ignore(other)
2570            if self.expr is not None:
2571                self.expr.ignore(self.ignoreExprs[-1])
2572        return self
2573
2574    def streamline(self):
2575        super().streamline()
2576        if self.expr is not None:
2577            self.expr.streamline()
2578        return self
2579
2580    def checkRecursion(self, parseElementList):
2581        if self in parseElementList:
2582            raise RecursiveGrammarException(parseElementList+[self])
2583        subRecCheckList = parseElementList[:] + [self]
2584        if self.expr is not None:
2585            self.expr.checkRecursion(subRecCheckList)
2586
2587    def validate(self, validateTrace=None):
2588        if validateTrace is None:
2589            validateTrace = []
2590        tmp = validateTrace[:]+[self]
2591        if self.expr is not None:
2592            self.expr.validate(tmp)
2593        self.checkRecursion([])
2594
2595    def __str__(self):
2596        try:
2597            return super().__str__()
2598        except Exception:
2599            pass
2600
2601        if self.strRepr is None and self.expr is not None:
2602            self.strRepr = f"{self.__class__.__name__}:({str(self.expr)})"
2603        return self.strRepr
2604
2605
2606class FollowedBy(ParseElementEnhance):
2607    """Lookahead matching of the given parse expression.  FollowedBy
2608    does *not* advance the parsing position within the input string, it only
2609    verifies that the specified parse expression matches at the current
2610    position.  FollowedBy always returns a null token list."""
2611
2612    def __init__(self, expr):
2613        super().__init__(expr)
2614        self.mayReturnEmpty = True
2615
2616    def parseImpl(self, instring, loc, doActions=True):
2617        self.expr.tryParse(instring, loc)
2618        return loc, []
2619
2620
2621class NotAny(ParseElementEnhance):
2622    """Lookahead to disallow matching with the given parse expression.  NotAny
2623    does *not* advance the parsing position within the input string, it only
2624    verifies that the specified parse expression does *not* match at the current
2625    position.  Also, NotAny does *not* skip over leading whitespace. NotAny
2626    always returns a null token list.  May be constructed using the '~' operator."""
2627
2628    def __init__(self, expr):
2629        super().__init__(expr)
2630        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2631        self.mayReturnEmpty = True
2632        self.errmsg = "Found unwanted token, "+str(self.expr)
2633
2634    def parseImpl(self, instring, loc, doActions=True):
2635        try:
2636            self.expr.tryParse(instring, loc)
2637        except (ParseException, IndexError):
2638            pass
2639        else:
2640            exc = self.myException
2641            exc.loc = loc
2642            exc.pstr = instring
2643            raise exc
2644        return loc, []
2645
2646    def __str__(self):
2647        if hasattr(self, "name"):
2648            return self.name
2649
2650        if self.strRepr is None:
2651            self.strRepr = "~{" + str(self.expr) + "}"
2652
2653        return self.strRepr
2654
2655
2656class ZeroOrMore(ParseElementEnhance):
2657    """Optional repetition of zero or more of the given expression."""
2658
2659    def __init__(self, expr):
2660        super().__init__(expr)
2661        self.mayReturnEmpty = True
2662
2663    def parseImpl(self, instring, loc, doActions=True):
2664        tokens = []
2665        try:
2666            loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
2667            hasIgnoreExprs = (len(self.ignoreExprs) > 0)
2668            while True:
2669                if hasIgnoreExprs:
2670                    preloc = self._skipIgnorables(instring, loc)
2671                else:
2672                    preloc = loc
2673                loc, tmptokens = self.expr._parse(instring, preloc, doActions)
2674                if tmptokens or tmptokens.keys():
2675                    tokens += tmptokens
2676        except (ParseException, IndexError):
2677            pass
2678
2679        return loc, tokens
2680
2681    def __str__(self):
2682        if hasattr(self, "name"):
2683            return self.name
2684
2685        if self.strRepr is None:
2686            self.strRepr = "[" + str(self.expr) + "]..."
2687
2688        return self.strRepr
2689
2690    def setResultsName(self, name, listAllMatches=False):
2691        ret = super().setResultsName(name, listAllMatches)
2692        ret.saveAsList = True
2693        return ret
2694
2695
2696class OneOrMore(ParseElementEnhance):
2697    """Repetition of one or more of the given expression."""
2698
2699    def parseImpl(self, instring, loc, doActions=True):
2700        # must be at least one
2701        loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
2702        try:
2703            hasIgnoreExprs = (len(self.ignoreExprs) > 0)
2704            while True:
2705                if hasIgnoreExprs:
2706                    preloc = self._skipIgnorables(instring, loc)
2707                else:
2708                    preloc = loc
2709                loc, tmptokens = self.expr._parse(instring, preloc, doActions)
2710                if tmptokens or tmptokens.keys():
2711                    tokens += tmptokens
2712        except (ParseException, IndexError):
2713            pass
2714
2715        return loc, tokens
2716
2717    def __str__(self):
2718        if hasattr(self, "name"):
2719            return self.name
2720
2721        if self.strRepr is None:
2722            self.strRepr = "{" + str(self.expr) + "}..."
2723
2724        return self.strRepr
2725
2726    def setResultsName(self, name, listAllMatches=False):
2727        ret = super().setResultsName(name, listAllMatches)
2728        ret.saveAsList = True
2729        return ret
2730
2731
2732class _NullToken:
2733    def __bool__(self):
2734        return False
2735    __nonzero__ = __bool__
2736
2737    def __str__(self):
2738        return ""
2739
2740
2741_optionalNotMatched = _NullToken()
2742
2743
2744class Optional(ParseElementEnhance):
2745    """Optional matching of the given expression.
2746       A default return string can also be specified, if the optional expression
2747       is not found.
2748    """
2749
2750    def __init__(self, exprs, default=_optionalNotMatched):
2751        super().__init__(exprs, savelist=False)
2752        self.defaultValue = default
2753        self.mayReturnEmpty = True
2754
2755    def parseImpl(self, instring, loc, doActions=True):
2756        try:
2757            loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False)
2758        except (ParseException, IndexError):
2759            if self.defaultValue is not _optionalNotMatched:
2760                if self.expr.resultsName:
2761                    tokens = ParseResults([self.defaultValue])
2762                    tokens[self.expr.resultsName] = self.defaultValue
2763                else:
2764                    tokens = [self.defaultValue]
2765            else:
2766                tokens = []
2767        return loc, tokens
2768
2769    def __str__(self):
2770        if hasattr(self, "name"):
2771            return self.name
2772
2773        if self.strRepr is None:
2774            self.strRepr = "[" + str(self.expr) + "]"
2775
2776        return self.strRepr
2777
2778
2779class SkipTo(ParseElementEnhance):
2780    """Token for skipping over all undefined text until the matched expression is found.
2781       If include is set to true, the matched expression is also consumed.  The ignore
2782       argument is used to define grammars (typically quoted strings and comments) that
2783       might contain false matches.
2784    """
2785
2786    def __init__(self, other, include=False, ignore=None):
2787        super().__init__(other)
2788        if ignore is not None:
2789            self.expr = self.expr.copy()
2790            self.expr.ignore(ignore)
2791        self.mayReturnEmpty = True
2792        self.mayIndexError = False
2793        self.includeMatch = include
2794        self.asList = False
2795        self.errmsg = "No match found for "+str(self.expr)
2796
2797    def parseImpl(self, instring, loc, doActions=True):
2798        startLoc = loc
2799        instrlen = len(instring)
2800        expr = self.expr
2801        while loc <= instrlen:
2802            try:
2803                loc = expr._skipIgnorables(instring, loc)
2804                expr._parse(instring, loc, doActions=False, callPreParse=False)
2805                if self.includeMatch:
2806                    skipText = instring[startLoc:loc]
2807                    loc, mat = expr._parse(instring, loc, doActions, callPreParse=False)
2808                    if mat:
2809                        skipRes = ParseResults(skipText)
2810                        skipRes += mat
2811                        return loc, [skipRes]
2812                    else:
2813                        return loc, [skipText]
2814                else:
2815                    return loc, [instring[startLoc:loc]]
2816            except (ParseException, IndexError):
2817                loc += 1
2818        exc = self.myException
2819        exc.loc = loc
2820        exc.pstr = instring
2821        raise exc
2822
2823
2824class Forward(ParseElementEnhance):
2825    """Forward declaration of an expression to be defined later -
2826       used for recursive grammars, such as algebraic infix notation.
2827       When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2828
2829       Note: take care when assigning to Forward not to overlook precedence of operators.
2830       Specifically, '|' has a lower precedence than '<<', so that::
2831          fwdExpr << a | b | c
2832       will actually be evaluated as::
2833          (fwdExpr << a) | b | c
2834       thereby leaving b and c out as parseable alternatives.  It is recommended that you
2835       explicitly group the values inserted into the Forward::
2836          fwdExpr << (a | b | c)
2837    """
2838
2839    def __init__(self, other=None):
2840        super().__init__(other, savelist=False)
2841
2842    def __lshift__(self, other):
2843        if isinstance(other, str):
2844            other = Literal(other)
2845        self.expr = other
2846        self.mayReturnEmpty = other.mayReturnEmpty
2847        self.strRepr = None
2848        self.mayIndexError = self.expr.mayIndexError
2849        self.mayReturnEmpty = self.expr.mayReturnEmpty
2850        self.setWhitespaceChars(self.expr.whiteChars)
2851        self.skipWhitespace = self.expr.skipWhitespace
2852        self.saveAsList = self.expr.saveAsList
2853        self.ignoreExprs.extend(self.expr.ignoreExprs)
2854        return None
2855
2856    def leaveWhitespace(self):
2857        self.skipWhitespace = False
2858        return self
2859
2860    def streamline(self):
2861        if not self.streamlined:
2862            self.streamlined = True
2863            if self.expr is not None:
2864                self.expr.streamline()
2865        return self
2866
2867    def validate(self, validateTrace=None):
2868        if validateTrace is None:
2869            validateTrace = []
2870        if self not in validateTrace:
2871            tmp = validateTrace[:]+[self]
2872            if self.expr is not None:
2873                self.expr.validate(tmp)
2874        self.checkRecursion([])
2875
2876    def __str__(self):
2877        if hasattr(self, "name"):
2878            return self.name
2879
2880        self.__class__ = _ForwardNoRecurse
2881        try:
2882            if self.expr is not None:
2883                retString = str(self.expr)
2884            else:
2885                retString = "None"
2886        finally:
2887            self.__class__ = Forward
2888        return "Forward: "+retString
2889
2890    def copy(self):
2891        if self.expr is not None:
2892            return super().copy()
2893        else:
2894            ret = Forward()
2895            ret << self
2896            return ret
2897
2898
2899class _ForwardNoRecurse(Forward):
2900    def __str__(self):
2901        return "..."
2902
2903
2904class TokenConverter(ParseElementEnhance):
2905    """Abstract subclass of ParseExpression, for converting parsed results."""
2906
2907    def __init__(self, expr, savelist=False):
2908        super().__init__(expr)  # , savelist )
2909        self.saveAsList = False
2910
2911
2912class Upcase(TokenConverter):
2913    """Converter to upper case all matching tokens."""
2914
2915    def __init__(self, *args):
2916        super().__init__(*args)
2917        warnings.warn(
2918            "Upcase class is deprecated, use upcaseTokens parse action instead",
2919            DeprecationWarning,
2920            stacklevel=2)
2921
2922    def postParse(self, instring, loc, tokenlist):
2923        return list(map(string.upper, tokenlist))
2924
2925
2926class Combine(TokenConverter):
2927    """Converter to concatenate all matching tokens to a single string.
2928       By default, the matching patterns must also be contiguous in the input string;
2929       this can be disabled by specifying 'adjacent=False' in the constructor.
2930    """
2931
2932    def __init__(self, expr, joinString="", adjacent=True):
2933        super().__init__(expr)
2934        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2935        if adjacent:
2936            self.leaveWhitespace()
2937        self.adjacent = adjacent
2938        self.skipWhitespace = True
2939        self.joinString = joinString
2940
2941    def ignore(self, other):
2942        if self.adjacent:
2943            ParserElement.ignore(self, other)
2944        else:
2945            super().ignore(other)
2946        return self
2947
2948    def postParse(self, instring, loc, tokenlist):
2949        retToks = tokenlist.copy()
2950        del retToks[:]
2951        retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults)
2952
2953        if self.resultsName and len(retToks.keys()) > 0:
2954            return [retToks]
2955        else:
2956            return retToks
2957
2958
2959class Group(TokenConverter):
2960    """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2961
2962    def __init__(self, expr):
2963        super().__init__(expr)
2964        self.saveAsList = True
2965
2966    def postParse(self, instring, loc, tokenlist):
2967        return [tokenlist]
2968
2969
2970class Dict(TokenConverter):
2971    """Converter to return a repetitive expression as a list, but also as a dictionary.
2972       Each element can also be referenced using the first token in the expression as its key.
2973       Useful for tabular report scraping when the first column can be used as a item key.
2974    """
2975
2976    def __init__(self, exprs):
2977        super().__init__(exprs)
2978        self.saveAsList = True
2979
2980    def postParse(self, instring, loc, tokenlist):
2981        for i, tok in enumerate(tokenlist):
2982            if len(tok) == 0:
2983                continue
2984            ikey = tok[0]
2985            if isinstance(ikey, int):
2986                ikey = str(tok[0]).strip()
2987            if len(tok) == 1:
2988                tokenlist[ikey] = _ParseResultsWithOffset("", i)
2989            elif len(tok) == 2 and not isinstance(tok[1], ParseResults):
2990                tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i)
2991            else:
2992                dictvalue = tok.copy()  # ParseResults(i)
2993                del dictvalue[0]
2994                if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.keys()):
2995                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i)
2996                else:
2997                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i)
2998
2999        if self.resultsName:
3000            return [tokenlist]
3001        else:
3002            return tokenlist
3003
3004
3005class Suppress(TokenConverter):
3006    """Converter for ignoring the results of a parsed expression."""
3007
3008    def postParse(self, instring, loc, tokenlist):
3009        return []
3010
3011    def suppress(self):
3012        return self
3013
3014
3015class OnlyOnce:
3016    """Wrapper for parse actions, to ensure they are only called once."""
3017
3018    def __init__(self, methodCall):
3019        self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3020        self.called = False
3021
3022    def __call__(self, s, l, t):
3023        if not self.called:
3024            results = self.callable(s, l, t)
3025            self.called = True
3026            return results
3027        raise ParseException(s, l, "")
3028
3029    def reset(self):
3030        self.called = False
3031
3032
3033def traceParseAction(f):
3034    """Decorator for debugging parse actions."""
3035    f = ParserElement._normalizeParseActionArgs(f)
3036
3037    def z(*paArgs):
3038        thisFunc = f.func_name
3039        s, l, t = paArgs[-3:]
3040        if len(paArgs) > 3:
3041            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3042        sys.stderr.write(">>entering %s(line: '%s', %d, %s)\n" % (thisFunc, line(l, s), l, t))
3043        try:
3044            ret = f(*paArgs)
3045        except Exception as exc:
3046            sys.stderr.write(f"<<leaving {thisFunc} (exception: {exc})\n")
3047            raise
3048        sys.stderr.write(f"<<leaving {thisFunc} (ret: {ret})\n")
3049        return ret
3050    try:
3051        z.__name__ = f.__name__
3052    except AttributeError:
3053        pass
3054    return z
3055
3056#
3057# global helpers
3058#
3059
3060
3061def delimitedList(expr, delim=",", combine=False):
3062    """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3063       By default, the list elements and delimiters can have intervening whitespace, and
3064       comments, but this can be overridden by passing 'combine=True' in the constructor.
3065       If combine is set to True, the matching tokens are returned as a single token
3066       string, with the delimiters included; otherwise, the matching tokens are returned
3067       as a list of tokens, with the delimiters suppressed.
3068    """
3069    dlName = str(expr)+" ["+str(delim)+" "+str(expr)+"]..."
3070    if combine:
3071        return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName)
3072    else:
3073        return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName)
3074
3075
3076def countedArray(expr):
3077    """Helper to define a counted list of expressions.
3078       This helper defines a pattern of the form::
3079           integer expr expr expr...
3080       where the leading integer tells how many expr expressions follow.
3081       The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3082    """
3083    arrayExpr = Forward()
3084
3085    def countFieldParseAction(s, l, t):
3086        n = int(t[0])
3087        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3088        return []
3089    return (Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr)
3090
3091
3092def _flatten(L):
3093    if not isinstance(L, list):
3094        return [L]
3095    if L == []:
3096        return L
3097    return _flatten(L[0]) + _flatten(L[1:])
3098
3099
3100def matchPreviousLiteral(expr):
3101    """Helper to define an expression that is indirectly defined from
3102       the tokens matched in a previous expression, that is, it looks
3103       for a 'repeat' of a previous expression.  For example::
3104           first = Word(nums)
3105           second = matchPreviousLiteral(first)
3106           matchExpr = first + ":" + second
3107       will match "1:1", but not "1:2".  Because this matches a
3108       previous literal, will also match the leading "1:1" in "1:10".
3109       If this is not desired, use matchPreviousExpr.
3110       Do *not* use with packrat parsing enabled.
3111    """
3112    rep = Forward()
3113
3114    def copyTokenToRepeater(s, l, t):
3115        if t:
3116            if len(t) == 1:
3117                rep << t[0]
3118            else:
3119                # flatten t tokens
3120                tflat = _flatten(t.asList())
3121                rep << And([Literal(tt) for tt in tflat])
3122        else:
3123            rep << Empty()
3124    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3125    return rep
3126
3127
3128def matchPreviousExpr(expr):
3129    """Helper to define an expression that is indirectly defined from
3130       the tokens matched in a previous expression, that is, it looks
3131       for a 'repeat' of a previous expression.  For example::
3132           first = Word(nums)
3133           second = matchPreviousExpr(first)
3134           matchExpr = first + ":" + second
3135       will match "1:1", but not "1:2".  Because this matches by
3136       expressions, will *not* match the leading "1:1" in "1:10";
3137       the expressions are evaluated first, and then compared, so
3138       "1" is compared with "10".
3139       Do *not* use with packrat parsing enabled.
3140    """
3141    rep = Forward()
3142    e2 = expr.copy()
3143    rep << e2
3144
3145    def copyTokenToRepeater(s, l, t):
3146        matchTokens = _flatten(t.asList())
3147
3148        def mustMatchTheseTokens(s, l, t):
3149            theseTokens = _flatten(t.asList())
3150            if theseTokens != matchTokens:
3151                raise ParseException("", 0, "")
3152        rep.setParseAction(mustMatchTheseTokens, callDuringTry=True)
3153    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3154    return rep
3155
3156
3157def _escapeRegexRangeChars(s):
3158    # ~  escape these chars: ^-]
3159    for c in r"\^-]":
3160        s = s.replace(c, "\\"+c)
3161    s = s.replace("\n", r"\n")
3162    s = s.replace("\t", r"\t")
3163    return str(s)
3164
3165
3166def oneOf(strs, caseless=False, useRegex=True):
3167    """Helper to quickly define a set of alternative Literals, and makes sure to do
3168       longest-first testing when there is a conflict, regardless of the input order,
3169       but returns a MatchFirst for best performance.
3170
3171       Parameters:
3172        - strs - a string of space-delimited literals, or a list of string literals
3173        - caseless - (default=False) - treat all literals as caseless
3174        - useRegex - (default=True) - as an optimization, will generate a Regex
3175          object; otherwise, will generate a MatchFirst object (if caseless=True, or
3176          if creating a Regex raises an exception)
3177    """
3178    if caseless:
3179        isequal = (lambda a, b: a.upper() == b.upper())
3180        masks = (lambda a, b: b.upper().startswith(a.upper()))
3181        parseElementClass = CaselessLiteral
3182    else:
3183        isequal = (lambda a, b: a == b)
3184        masks = (lambda a, b: b.startswith(a))
3185        parseElementClass = Literal
3186
3187    if isinstance(strs, (list, tuple)):
3188        symbols = strs[:]
3189    elif isinstance(strs, str):
3190        symbols = strs.split()
3191    else:
3192        warnings.warn(
3193            "Invalid argument to oneOf, expected string or list",
3194            SyntaxWarning,
3195            stacklevel=2)
3196
3197    i = 0
3198    while i < len(symbols)-1:
3199        cur = symbols[i]
3200        for j, other in enumerate(symbols[i+1:]):
3201            if isequal(other, cur):
3202                del symbols[i+j+1]
3203                break
3204            elif masks(cur, other):
3205                del symbols[i+j+1]
3206                symbols.insert(i, other)
3207                cur = other
3208                break
3209        else:
3210            i += 1
3211
3212    if not caseless and useRegex:
3213        # ~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3214        try:
3215            if len(symbols) == len("".join(symbols)):
3216                return Regex("[%s]" % "".join([_escapeRegexRangeChars(sym) for sym in symbols]))
3217            else:
3218                return Regex("|".join([re.escape(sym) for sym in symbols]))
3219        except Exception:
3220            warnings.warn(
3221                "Exception creating Regex for oneOf, building MatchFirst",
3222                SyntaxWarning,
3223                stacklevel=2)
3224
3225    # last resort, just use MatchFirst
3226    return MatchFirst([parseElementClass(sym) for sym in symbols])
3227
3228
3229def dictOf(key, value):
3230    """Helper to easily and clearly define a dictionary by specifying the respective patterns
3231       for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
3232       in the proper order.  The key pattern can include delimiting markers or punctuation,
3233       as long as they are suppressed, thereby leaving the significant key text.  The value
3234       pattern can include named results, so that the Dict results can include named token
3235       fields.
3236    """
3237    return Dict(ZeroOrMore(Group(key + value)))
3238
3239
3240# convenience constants for positional expressions
3241empty = Empty().setName("empty")
3242lineStart = LineStart().setName("lineStart")
3243lineEnd = LineEnd().setName("lineEnd")
3244stringStart = StringStart().setName("stringStart")
3245stringEnd = StringEnd().setName("stringEnd")
3246
3247_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1])
3248_printables_less_backslash = "".join([c for c in printables if c not in r"\]"])
3249_escapedHexChar = Combine(Suppress(_bslash + "0x") + Word(hexnums)).setParseAction(lambda s, l, t: chr(int(t[0], 16)))
3250_escapedOctChar = Combine(Suppress(_bslash) + Word("0", "01234567")).setParseAction(lambda s, l, t: chr(int(t[0], 8)))
3251_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash, exact=1)
3252_charRange = Group(_singleChar + Suppress("-") + _singleChar)
3253_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]"
3254
3255
3256def _expanded(p):
3257    return (isinstance(p, ParseResults) and ''.join([chr(c) for c in range(ord(p[0]), ord(p[1])+1)]) or p)
3258
3259
3260def srange(s):
3261    r"""Helper to easily define string ranges for use in Word construction.  Borrows
3262       syntax from regexp '[]' string range definitions::
3263          srange("[0-9]")   -> "0123456789"
3264          srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
3265          srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3266       The input string must be enclosed in []'s, and the returned string is the expanded
3267       character set joined into a single string.
3268       The values enclosed in the []'s may be::
3269          a single character
3270          an escaped character with a leading backslash (such as \- or \])
3271          an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3272          an escaped octal character with a leading '\0' (\041, which is a '!' character)
3273          a range of any of the above, separated by a dash ('a-z', etc.)
3274          any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3275    """
3276    try:
3277        return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3278    except Exception:
3279        return ""
3280
3281
3282def matchOnlyAtCol(n):
3283    """Helper method for defining parse actions that require matching at a specific
3284       column in the input text.
3285    """
3286    def verifyCol(strg, locn, toks):
3287        if col(locn, strg) != n:
3288            raise ParseException(strg, locn, "matched token not at column %d" % n)
3289    return verifyCol
3290
3291
3292def replaceWith(replStr):
3293    """Helper method for common parse actions that simply return a literal value.  Especially
3294       useful when used with transformString().
3295    """
3296    def _replFunc(*args):
3297        return [replStr]
3298    return _replFunc
3299
3300
3301def removeQuotes(s, l, t):
3302    """Helper parse action for removing quotation marks from parsed quoted strings.
3303       To use, add this parse action to quoted string using::
3304         quotedString.setParseAction( removeQuotes )
3305    """
3306    return t[0][1:-1]
3307
3308
3309def upcaseTokens(s, l, t):
3310    """Helper parse action to convert tokens to upper case."""
3311    return [tt.upper() for tt in map(str, t)]
3312
3313
3314def downcaseTokens(s, l, t):
3315    """Helper parse action to convert tokens to lower case."""
3316    return [tt.lower() for tt in map(str, t)]
3317
3318
3319def keepOriginalText(s, startLoc, t):
3320    """Helper parse action to preserve original parsed text,
3321       overriding any nested parse actions."""
3322    try:
3323        endloc = getTokensEndLoc()
3324    except ParseException:
3325        raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3326    del t[:]
3327    t += ParseResults(s[startLoc:endloc])
3328    return t
3329
3330
3331def getTokensEndLoc():
3332    """Method to be called from within a parse action to determine the end
3333       location of the parsed tokens."""
3334    import inspect
3335    fstack = inspect.stack()
3336    try:
3337        # search up the stack (through intervening argument normalizers) for correct calling routine
3338        for f in fstack[2:]:
3339            if f[3] == "_parseNoCache":
3340                endloc = f[0].f_locals["loc"]
3341                return endloc
3342        else:
3343            raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3344    finally:
3345        del fstack
3346
3347
3348def _makeTags(tagStr, xml):
3349    """Internal helper to construct opening and closing tag expressions, given a tag name"""
3350    if isinstance(tagStr, str):
3351        resname = tagStr
3352        tagStr = Keyword(tagStr, caseless=not xml)
3353    else:
3354        resname = tagStr.name
3355
3356    tagAttrName = Word(alphas, alphanums+"_-:")
3357    if xml:
3358        tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes)
3359        openTag = Suppress("<") + tagStr + \
3360            Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) + \
3361            Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == '/') + Suppress(">")
3362    else:
3363        printablesLessRAbrack = "".join([c for c in printables if c not in ">"])
3364        tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printablesLessRAbrack)
3365        openTag = Suppress("<") + tagStr + \
3366            Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) + Optional(Suppress("=") + tagAttrValue)))) + \
3367            Optional("/", default=[False]).setResultsName("empty").setParseAction(lambda s, l, t: t[0] == '/') + Suppress(">")
3368    closeTag = Combine(_L("</") + tagStr + ">")
3369
3370    openTag = openTag.setResultsName("start"+"".join(resname.replace(":", " ").title().split())).setName("<%s>" % tagStr)
3371    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":", " ").title().split())).setName("</%s>" % tagStr)
3372
3373    return openTag, closeTag
3374
3375
3376def makeHTMLTags(tagStr):
3377    """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3378    return _makeTags(tagStr, False)
3379
3380
3381def makeXMLTags(tagStr):
3382    """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3383    return _makeTags(tagStr, True)
3384
3385
3386def withAttribute(*args, **attrDict):
3387    """Helper to create a validating parse action to be used with start tags created
3388       with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3389       with a required attribute value, to avoid false matches on common tags such as
3390       <TD> or <DIV>.
3391
3392       Call withAttribute with a series of attribute names and values. Specify the list
3393       of filter attributes names and values as:
3394        - keyword arguments, as in (class="Customer",align="right"), or
3395        - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3396       For attribute names with a namespace prefix, you must use the second form.  Attribute
3397       names are matched insensitive to upper/lower case.
3398
3399       To verify that the attribute exists, but without specifying a value, pass
3400       withAttribute.ANY_VALUE as the value.
3401       """
3402    if args:
3403        attrs = args[:]
3404    else:
3405        attrs = attrDict.items()
3406    attrs = [(k, v) for k, v in attrs]
3407
3408    def pa(s, l, tokens):
3409        for attrName, attrValue in attrs:
3410            if attrName not in tokens:
3411                raise ParseException(s, l, "no matching attribute " + attrName)
3412            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3413                raise ParseException(s, l, f"attribute '{attrName}' has value '{tokens[attrName]}', must be '{attrValue}'")
3414    return pa
3415
3416
3417withAttribute.ANY_VALUE = object()
3418
3419opAssoc = _Constants()
3420opAssoc.LEFT = object()
3421opAssoc.RIGHT = object()
3422
3423
3424def operatorPrecedence(baseExpr, opList):
3425    """Helper method for constructing grammars of expressions made up of
3426       operators working in a precedence hierarchy.  Operators may be unary or
3427       binary, left- or right-associative.  Parse actions can also be attached
3428       to operator expressions.
3429
3430       Parameters:
3431        - baseExpr - expression representing the most basic element for the nested
3432        - opList - list of tuples, one for each operator precedence level in the
3433          expression grammar; each tuple is of the form
3434          (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3435           - opExpr is the pyparsing expression for the operator;
3436              may also be a string, which will be converted to a Literal;
3437              if numTerms is 3, opExpr is a tuple of two expressions, for the
3438              two operators separating the 3 terms
3439           - numTerms is the number of terms for this operator (must
3440              be 1, 2, or 3)
3441           - rightLeftAssoc is the indicator whether the operator is
3442              right or left associative, using the pyparsing-defined
3443              constants opAssoc.RIGHT and opAssoc.LEFT.
3444           - parseAction is the parse action to be associated with
3445              expressions matching this operator expression (the
3446              parse action tuple member may be omitted)
3447    """
3448    ret = Forward()
3449    lastExpr = baseExpr | (Suppress('(') + ret + Suppress(')'))
3450    for operDef in opList:
3451        opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4]
3452        if arity == 3:
3453            if opExpr is None or len(opExpr) != 2:
3454                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3455            opExpr1, opExpr2 = opExpr
3456        thisExpr = Forward()  # .setName("expr%d" % i)
3457        if rightLeftAssoc == opAssoc.LEFT:
3458            if arity == 1:
3459                matchExpr = FollowedBy(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr))
3460            elif arity == 2:
3461                if opExpr is not None:
3462                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr))
3463                else:
3464                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group(lastExpr + OneOrMore(lastExpr))
3465            elif arity == 3:
3466                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3467                    Group(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr)
3468            else:
3469                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3470        elif rightLeftAssoc == opAssoc.RIGHT:
3471            if arity == 1:
3472                # try to avoid LR with this extra test
3473                if not isinstance(opExpr, Optional):
3474                    opExpr = Optional(opExpr)
3475                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
3476            elif arity == 2:
3477                if opExpr is not None:
3478                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr))
3479                else:
3480                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr))
3481            elif arity == 3:
3482                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3483                    Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
3484            else:
3485                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3486        else:
3487            raise ValueError("operator must indicate right or left associativity")
3488        if pa:
3489            matchExpr.setParseAction(pa)
3490        thisExpr << (matchExpr | lastExpr)
3491        lastExpr = thisExpr
3492    ret << lastExpr
3493    return ret
3494
3495
3496dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3497sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3498quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3499unicodeString = Combine(_L('u') + quotedString.copy())
3500
3501
3502def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3503    """Helper method for defining nested lists enclosed in opening and closing
3504       delimiters ("(" and ")" are the default).
3505
3506       Parameters:
3507        - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3508        - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3509        - content - expression for items within the nested lists (default=None)
3510        - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3511
3512       If an expression is not provided for the content argument, the nested
3513       expression will capture all whitespace-delimited content between delimiters
3514       as a list of separate values.
3515
3516       Use the ignoreExpr argument to define expressions that may contain
3517       opening or closing characters that should not be treated as opening
3518       or closing characters for nesting, such as quotedString or a comment
3519       expression.  Specify multiple expressions using an Or or MatchFirst.
3520       The default is quotedString, but if no expressions are to be ignored,
3521       then pass None for this argument.
3522    """
3523    if opener == closer:
3524        raise ValueError("opening and closing strings cannot be the same")
3525    if content is None:
3526        if isinstance(opener, str) and isinstance(closer, str):
3527            if ignoreExpr is not None:
3528                content = (Combine(OneOrMore(~ignoreExpr + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS, exact=1)))
3529                           .setParseAction(lambda t: t[0].strip()))
3530            else:
3531                content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t: t[0].strip()))
3532        else:
3533            raise ValueError("opening and closing arguments must be strings if no content expression is given")
3534    ret = Forward()
3535    if ignoreExpr is not None:
3536        ret << Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer))
3537    else:
3538        ret << Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
3539    return ret
3540
3541
3542def indentedBlock(blockStatementExpr, indentStack, indent=True):
3543    """Helper method for defining space-delimited indentation blocks, such as
3544       those used to define block statements in Python source code.
3545
3546       Parameters:
3547        - blockStatementExpr - expression defining syntax of statement that
3548            is repeated within the indented block
3549        - indentStack - list created by caller to manage indentation stack
3550            (multiple statementWithIndentedBlock expressions within a single grammar
3551            should share a common indentStack)
3552        - indent - boolean indicating whether block must be indented beyond the
3553            the current level; set to False for block of left-most statements
3554            (default=True)
3555
3556       A valid block must contain at least one blockStatement.
3557    """
3558    def checkPeerIndent(s, l, t):
3559        if l >= len(s):
3560            return
3561        curCol = col(l, s)
3562        if curCol != indentStack[-1]:
3563            if curCol > indentStack[-1]:
3564                raise ParseFatalException(s, l, "illegal nesting")
3565            raise ParseException(s, l, "not a peer entry")
3566
3567    def checkSubIndent(s, l, t):
3568        curCol = col(l, s)
3569        if curCol > indentStack[-1]:
3570            indentStack.append(curCol)
3571        else:
3572            raise ParseException(s, l, "not a subentry")
3573
3574    def checkUnindent(s, l, t):
3575        if l >= len(s):
3576            return
3577        curCol = col(l, s)
3578        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3579            raise ParseException(s, l, "not an unindent")
3580        indentStack.pop()
3581
3582    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3583    INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3584    PEER = Empty().setParseAction(checkPeerIndent)
3585    UNDENT = Empty().setParseAction(checkUnindent)
3586    if indent:
3587        smExpr = Group(
3588            Optional(NL) + FollowedBy(blockStatementExpr)
3589            + INDENT + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))) + UNDENT)
3590    else:
3591        smExpr = Group(Optional(NL) + (OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL))))
3592    blockStatementExpr.ignore("\\" + LineEnd())
3593    return smExpr
3594
3595
3596alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3597punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3598
3599anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums+"_:"))
3600commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") + ";")
3601_htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(), "><& '"))
3602
3603
3604def replaceHTMLEntity(t):
3605    return t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3606
3607
3608# it's easy to get these comment structures wrong - they're very common, so may as well make them available
3609cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3610
3611htmlComment = Regex(r"<!--[\s\S]*?-->")
3612restOfLine = Regex(r".*").leaveWhitespace()
3613dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3614cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3615
3616javaStyleComment = cppStyleComment
3617pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3618_noncomma = "".join([c for c in printables if c != ","])
3619_commasepitem = Combine(OneOrMore(Word(_noncomma) + Optional(Word(" \t") + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem")
3620commaSeparatedList = delimitedList(Optional(quotedString | _commasepitem, default="")).setName("commaSeparatedList")
3621
3622
3623if __name__ == "__main__":
3624
3625    def test(teststring):
3626        try:
3627            tokens = simpleSQL.parseString(teststring)
3628            tokenlist = tokens.asList()
3629            print(teststring + "->" + str(tokenlist))
3630            print("tokens = " + str(tokens))
3631            print("tokens.columns = " + str(tokens.columns))
3632            print("tokens.tables = " + str(tokens.tables))
3633            print(tokens.asXML("SQL", True))
3634        except ParseBaseException as err:
3635            print(teststring + "->")
3636            print(err.line)
3637            print(" "*(err.column-1) + "^")
3638            print(err)
3639        print()
3640
3641    selectToken = CaselessLiteral("select")
3642    fromToken = CaselessLiteral("from")
3643
3644    ident = Word(alphas, alphanums + "_$")
3645    columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
3646    columnNameList = Group(delimitedList(columnName))  # .setName("columns")
3647    tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
3648    tableNameList = Group(delimitedList(tableName))  # .setName("tables")
3649    simpleSQL = (
3650        selectToken
3651        + ('*' | columnNameList).setResultsName("columns")
3652        + fromToken
3653        + tableNameList.setResultsName("tables"))
3654
3655    test("SELECT * from XYZZY, ABC")
3656    test("select * from SYS.XYZZY")
3657    test("Select A from Sys.dual")
3658    test("Select AA,BB,CC from Sys.dual")
3659    test("Select A, B, C from Sys.dual")
3660    test("Select A, B, C from Sys.dual")
3661    test("Xelect A, B, C from Sys.dual")
3662    test("Select A, B, C frox Sys.dual")
3663    test("Select")
3664    test("Select ^^^ frox Sys.dual")
3665    test("Select A, B, C from Sys.dual, Table2   ")
3666