1# module pyparsing.py
2#
3# Copyright (c) 2003-2018  Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24
25__doc__ = \
26"""
27pyparsing module - Classes and methods to define and execute parsing grammars
28=============================================================================
29
30The pyparsing module is an alternative approach to creating and executing simple grammars,
31vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33provides a library of classes that you use to construct the grammar directly in Python.
34
35Here is a program to parse "Hello, World!" (or any greeting of the form
36C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
37(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
38L{Literal} expressions)::
39
40    from pyparsing import Word, alphas
41
42    # define grammar of a greeting
43    greet = Word(alphas) + "," + Word(alphas) + "!"
44
45    hello = "Hello, World!"
46    print (hello, "->", greet.parseString(hello))
47
48The program outputs the following::
49
50    Hello, World! -> ['Hello', ',', 'World', '!']
51
52The Python representation of the grammar is quite readable, owing to the self-explanatory
53class names, and the use of '+', '|' and '^' operators.
54
55The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
56object with named attributes.
57
58The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
60 - quoted strings
61 - embedded comments
62
63
64Getting Started -
65-----------------
66Visit the classes L{ParserElement} and L{ParseResults} to see the base classes that most other pyparsing
67classes inherit from. Use the docstrings for examples of how to:
68 - construct literal match expressions from L{Literal} and L{CaselessLiteral} classes
69 - construct character word-group expressions using the L{Word} class
70 - see how to create repetitive expressions using L{ZeroOrMore} and L{OneOrMore} classes
71 - use L{'+'<And>}, L{'|'<MatchFirst>}, L{'^'<Or>}, and L{'&'<Each>} operators to combine simple expressions into more complex ones
72 - associate names with your parsed results using L{ParserElement.setResultsName}
73 - find some helpful expression short-cuts like L{delimitedList} and L{oneOf}
74 - find more useful common expressions in the L{pyparsing_common} namespace class
75"""
76
77__version__ = "2.2.1"
78__versionTime__ = "18 Sep 2018 00:49 UTC"
79__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
80
81import string
82from weakref import ref as wkref
83import copy
84import sys
85import warnings
86import re
87import sre_constants
88import collections
89import pprint
90import traceback
91import types
92from datetime import datetime
93
94try:
95    from _thread import RLock
96except ImportError:
97    from threading import RLock
98
99try:
100    # Python 3
101    from collections.abc import Iterable
102    from collections.abc import MutableMapping
103except ImportError:
104    # Python 2.7
105    from collections import Iterable
106    from collections import MutableMapping
107
108try:
109    from collections import OrderedDict as _OrderedDict
110except ImportError:
111    try:
112        from ordereddict import OrderedDict as _OrderedDict
113    except ImportError:
114        _OrderedDict = None
115
116#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
117
118__all__ = [
119'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
120'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
121'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
122'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
123'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
124'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
125'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
126'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
127'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
128'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
129'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
130'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
131'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
132'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
133'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
134'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
135'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
136'CloseMatch', 'tokenMap', 'pyparsing_common',
137]
138
139system_version = tuple(sys.version_info)[:3]
140PY_3 = system_version[0] == 3
141if PY_3:
142    _MAX_INT = sys.maxsize
143    basestring = str
144    unichr = chr
145    _ustr = str
146
147    # build list of single arg builtins, that can be used as parse actions
148    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
149
150else:
151    _MAX_INT = sys.maxint
152    range = xrange
153
154    def _ustr(obj):
155        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
156           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
157           then < returns the unicode object | encodes it with the default encoding | ... >.
158        """
159        if isinstance(obj,unicode):
160            return obj
161
162        try:
163            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
164            # it won't break any existing code.
165            return str(obj)
166
167        except UnicodeEncodeError:
168            # Else encode it
169            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
170            xmlcharref = Regex(r'&#\d+;')
171            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
172            return xmlcharref.transformString(ret)
173
174    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
175    singleArgBuiltins = []
176    import __builtin__
177    for fname in "sum len sorted reversed list tuple set any all min max".split():
178        try:
179            singleArgBuiltins.append(getattr(__builtin__,fname))
180        except AttributeError:
181            continue
182
183_generatorType = type((y for y in range(1)))
184
185def _xml_escape(data):
186    """Escape &, <, >, ", ', etc. in a string of data."""
187
188    # ampersand must be replaced first
189    from_symbols = '&><"\''
190    to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
191    for from_,to_ in zip(from_symbols, to_symbols):
192        data = data.replace(from_, to_)
193    return data
194
195class _Constants(object):
196    pass
197
198alphas     = string.ascii_uppercase + string.ascii_lowercase
199nums       = "0123456789"
200hexnums    = nums + "ABCDEFabcdef"
201alphanums  = alphas + nums
202_bslash    = chr(92)
203printables = "".join(c for c in string.printable if c not in string.whitespace)
204
205class ParseBaseException(Exception):
206    """base exception class for all parsing runtime exceptions"""
207    # Performance tuning: we construct a *lot* of these, so keep this
208    # constructor as small and fast as possible
209    def __init__( self, pstr, loc=0, msg=None, elem=None ):
210        self.loc = loc
211        if msg is None:
212            self.msg = pstr
213            self.pstr = ""
214        else:
215            self.msg = msg
216            self.pstr = pstr
217        self.parserElement = elem
218        self.args = (pstr, loc, msg)
219
220    @classmethod
221    def _from_exception(cls, pe):
222        """
223        internal factory method to simplify creating one type of ParseException
224        from another - avoids having __init__ signature conflicts among subclasses
225        """
226        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
227
228    def __getattr__( self, aname ):
229        """supported attributes by name are:
230            - lineno - returns the line number of the exception text
231            - col - returns the column number of the exception text
232            - line - returns the line containing the exception text
233        """
234        if( aname == "lineno" ):
235            return lineno( self.loc, self.pstr )
236        elif( aname in ("col", "column") ):
237            return col( self.loc, self.pstr )
238        elif( aname == "line" ):
239            return line( self.loc, self.pstr )
240        else:
241            raise AttributeError(aname)
242
243    def __str__( self ):
244        return "%s (at char %d), (line:%d, col:%d)" % \
245                ( self.msg, self.loc, self.lineno, self.column )
246    def __repr__( self ):
247        return _ustr(self)
248    def markInputline( self, markerString = ">!<" ):
249        """Extracts the exception line from the input string, and marks
250           the location of the exception with a special symbol.
251        """
252        line_str = self.line
253        line_column = self.column - 1
254        if markerString:
255            line_str = "".join((line_str[:line_column],
256                                markerString, line_str[line_column:]))
257        return line_str.strip()
258    def __dir__(self):
259        return "lineno col line".split() + dir(type(self))
260
261class ParseException(ParseBaseException):
262    """
263    Exception thrown when parse expressions don't match class;
264    supported attributes by name are:
265     - lineno - returns the line number of the exception text
266     - col - returns the column number of the exception text
267     - line - returns the line containing the exception text
268
269    Example::
270        try:
271            Word(nums).setName("integer").parseString("ABC")
272        except ParseException as pe:
273            print(pe)
274            print("column: {}".format(pe.col))
275
276    prints::
277       Expected integer (at char 0), (line:1, col:1)
278        column: 1
279    """
280    pass
281
282class ParseFatalException(ParseBaseException):
283    """user-throwable exception thrown when inconsistent parse content
284       is found; stops all parsing immediately"""
285    pass
286
287class ParseSyntaxException(ParseFatalException):
288    """just like L{ParseFatalException}, but thrown internally when an
289       L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
290       immediately because an unbacktrackable syntax error has been found"""
291    pass
292
293#~ class ReparseException(ParseBaseException):
294    #~ """Experimental class - parse actions can raise this exception to cause
295       #~ pyparsing to reparse the input string:
296        #~ - with a modified input string, and/or
297        #~ - with a modified start location
298       #~ Set the values of the ReparseException in the constructor, and raise the
299       #~ exception in a parse action to cause pyparsing to use the new string/location.
300       #~ Setting the values as None causes no change to be made.
301       #~ """
302    #~ def __init_( self, newstring, restartLoc ):
303        #~ self.newParseText = newstring
304        #~ self.reparseLoc = restartLoc
305
306class RecursiveGrammarException(Exception):
307    """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
308    def __init__( self, parseElementList ):
309        self.parseElementTrace = parseElementList
310
311    def __str__( self ):
312        return "RecursiveGrammarException: %s" % self.parseElementTrace
313
314class _ParseResultsWithOffset(object):
315    def __init__(self,p1,p2):
316        self.tup = (p1,p2)
317    def __getitem__(self,i):
318        return self.tup[i]
319    def __repr__(self):
320        return repr(self.tup[0])
321    def setOffset(self,i):
322        self.tup = (self.tup[0],i)
323
324class ParseResults(object):
325    """
326    Structured parse results, to provide multiple means of access to the parsed data:
327       - as a list (C{len(results)})
328       - by list index (C{results[0], results[1]}, etc.)
329       - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
330
331    Example::
332        integer = Word(nums)
333        date_str = (integer.setResultsName("year") + '/'
334                        + integer.setResultsName("month") + '/'
335                        + integer.setResultsName("day"))
336        # equivalent form:
337        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
338
339        # parseString returns a ParseResults object
340        result = date_str.parseString("1999/12/31")
341
342        def test(s, fn=repr):
343            print("%s -> %s" % (s, fn(eval(s))))
344        test("list(result)")
345        test("result[0]")
346        test("result['month']")
347        test("result.day")
348        test("'month' in result")
349        test("'minutes' in result")
350        test("result.dump()", str)
351    prints::
352        list(result) -> ['1999', '/', '12', '/', '31']
353        result[0] -> '1999'
354        result['month'] -> '12'
355        result.day -> '31'
356        'month' in result -> True
357        'minutes' in result -> False
358        result.dump() -> ['1999', '/', '12', '/', '31']
359        - day: 31
360        - month: 12
361        - year: 1999
362    """
363    def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
364        if isinstance(toklist, cls):
365            return toklist
366        retobj = object.__new__(cls)
367        retobj.__doinit = True
368        return retobj
369
370    # Performance tuning: we construct a *lot* of these, so keep this
371    # constructor as small and fast as possible
372    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
373        if self.__doinit:
374            self.__doinit = False
375            self.__name = None
376            self.__parent = None
377            self.__accumNames = {}
378            self.__asList = asList
379            self.__modal = modal
380            if toklist is None:
381                toklist = []
382            if isinstance(toklist, list):
383                self.__toklist = toklist[:]
384            elif isinstance(toklist, _generatorType):
385                self.__toklist = list(toklist)
386            else:
387                self.__toklist = [toklist]
388            self.__tokdict = dict()
389
390        if name is not None and name:
391            if not modal:
392                self.__accumNames[name] = 0
393            if isinstance(name,int):
394                name = _ustr(name) # will always return a str, but use _ustr for consistency
395            self.__name = name
396            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
397                if isinstance(toklist,basestring):
398                    toklist = [ toklist ]
399                if asList:
400                    if isinstance(toklist,ParseResults):
401                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
402                    else:
403                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
404                    self[name].__name = name
405                else:
406                    try:
407                        self[name] = toklist[0]
408                    except (KeyError,TypeError,IndexError):
409                        self[name] = toklist
410
411    def __getitem__( self, i ):
412        if isinstance( i, (int,slice) ):
413            return self.__toklist[i]
414        else:
415            if i not in self.__accumNames:
416                return self.__tokdict[i][-1][0]
417            else:
418                return ParseResults([ v[0] for v in self.__tokdict[i] ])
419
420    def __setitem__( self, k, v, isinstance=isinstance ):
421        if isinstance(v,_ParseResultsWithOffset):
422            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
423            sub = v[0]
424        elif isinstance(k,(int,slice)):
425            self.__toklist[k] = v
426            sub = v
427        else:
428            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
429            sub = v
430        if isinstance(sub,ParseResults):
431            sub.__parent = wkref(self)
432
433    def __delitem__( self, i ):
434        if isinstance(i,(int,slice)):
435            mylen = len( self.__toklist )
436            del self.__toklist[i]
437
438            # convert int to slice
439            if isinstance(i, int):
440                if i < 0:
441                    i += mylen
442                i = slice(i, i+1)
443            # get removed indices
444            removed = list(range(*i.indices(mylen)))
445            removed.reverse()
446            # fixup indices in token dictionary
447            for name,occurrences in self.__tokdict.items():
448                for j in removed:
449                    for k, (value, position) in enumerate(occurrences):
450                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
451        else:
452            del self.__tokdict[i]
453
454    def __contains__( self, k ):
455        return k in self.__tokdict
456
457    def __len__( self ): return len( self.__toklist )
458    def __bool__(self): return ( not not self.__toklist )
459    __nonzero__ = __bool__
460    def __iter__( self ): return iter( self.__toklist )
461    def __reversed__( self ): return iter( self.__toklist[::-1] )
462    def _iterkeys( self ):
463        if hasattr(self.__tokdict, "iterkeys"):
464            return self.__tokdict.iterkeys()
465        else:
466            return iter(self.__tokdict)
467
468    def _itervalues( self ):
469        return (self[k] for k in self._iterkeys())
470
471    def _iteritems( self ):
472        return ((k, self[k]) for k in self._iterkeys())
473
474    if PY_3:
475        keys = _iterkeys
476        """Returns an iterator of all named result keys (Python 3.x only)."""
477
478        values = _itervalues
479        """Returns an iterator of all named result values (Python 3.x only)."""
480
481        items = _iteritems
482        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
483
484    else:
485        iterkeys = _iterkeys
486        """Returns an iterator of all named result keys (Python 2.x only)."""
487
488        itervalues = _itervalues
489        """Returns an iterator of all named result values (Python 2.x only)."""
490
491        iteritems = _iteritems
492        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
493
494        def keys( self ):
495            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
496            return list(self.iterkeys())
497
498        def values( self ):
499            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
500            return list(self.itervalues())
501
502        def items( self ):
503            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
504            return list(self.iteritems())
505
506    def haskeys( self ):
507        """Since keys() returns an iterator, this method is helpful in bypassing
508           code that looks for the existence of any defined results names."""
509        return bool(self.__tokdict)
510
511    def pop( self, *args, **kwargs):
512        """
513        Removes and returns item at specified index (default=C{last}).
514        Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
515        argument or an integer argument, it will use C{list} semantics
516        and pop tokens from the list of parsed tokens. If passed a
517        non-integer argument (most likely a string), it will use C{dict}
518        semantics and pop the corresponding value from any defined
519        results names. A second default return value argument is
520        supported, just as in C{dict.pop()}.
521
522        Example::
523            def remove_first(tokens):
524                tokens.pop(0)
525            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
526            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
527
528            label = Word(alphas)
529            patt = label("LABEL") + OneOrMore(Word(nums))
530            print(patt.parseString("AAB 123 321").dump())
531
532            # Use pop() in a parse action to remove named result (note that corresponding value is not
533            # removed from list form of results)
534            def remove_LABEL(tokens):
535                tokens.pop("LABEL")
536                return tokens
537            patt.addParseAction(remove_LABEL)
538            print(patt.parseString("AAB 123 321").dump())
539        prints::
540            ['AAB', '123', '321']
541            - LABEL: AAB
542
543            ['AAB', '123', '321']
544        """
545        if not args:
546            args = [-1]
547        for k,v in kwargs.items():
548            if k == 'default':
549                args = (args[0], v)
550            else:
551                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
552        if (isinstance(args[0], int) or
553                        len(args) == 1 or
554                        args[0] in self):
555            index = args[0]
556            ret = self[index]
557            del self[index]
558            return ret
559        else:
560            defaultvalue = args[1]
561            return defaultvalue
562
563    def get(self, key, defaultValue=None):
564        """
565        Returns named result matching the given key, or if there is no
566        such name, then returns the given C{defaultValue} or C{None} if no
567        C{defaultValue} is specified.
568
569        Similar to C{dict.get()}.
570
571        Example::
572            integer = Word(nums)
573            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
574
575            result = date_str.parseString("1999/12/31")
576            print(result.get("year")) # -> '1999'
577            print(result.get("hour", "not specified")) # -> 'not specified'
578            print(result.get("hour")) # -> None
579        """
580        if key in self:
581            return self[key]
582        else:
583            return defaultValue
584
585    def insert( self, index, insStr ):
586        """
587        Inserts new element at location index in the list of parsed tokens.
588
589        Similar to C{list.insert()}.
590
591        Example::
592            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
593
594            # use a parse action to insert the parse location in the front of the parsed results
595            def insert_locn(locn, tokens):
596                tokens.insert(0, locn)
597            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
598        """
599        self.__toklist.insert(index, insStr)
600        # fixup indices in token dictionary
601        for name,occurrences in self.__tokdict.items():
602            for k, (value, position) in enumerate(occurrences):
603                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
604
605    def append( self, item ):
606        """
607        Add single element to end of ParseResults list of elements.
608
609        Example::
610            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
611
612            # use a parse action to compute the sum of the parsed integers, and add it to the end
613            def append_sum(tokens):
614                tokens.append(sum(map(int, tokens)))
615            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
616        """
617        self.__toklist.append(item)
618
619    def extend( self, itemseq ):
620        """
621        Add sequence of elements to end of ParseResults list of elements.
622
623        Example::
624            patt = OneOrMore(Word(alphas))
625
626            # use a parse action to append the reverse of the matched strings, to make a palindrome
627            def make_palindrome(tokens):
628                tokens.extend(reversed([t[::-1] for t in tokens]))
629                return ''.join(tokens)
630            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
631        """
632        if isinstance(itemseq, ParseResults):
633            self += itemseq
634        else:
635            self.__toklist.extend(itemseq)
636
637    def clear( self ):
638        """
639        Clear all elements and results names.
640        """
641        del self.__toklist[:]
642        self.__tokdict.clear()
643
644    def __getattr__( self, name ):
645        try:
646            return self[name]
647        except KeyError:
648            return ""
649
650        if name in self.__tokdict:
651            if name not in self.__accumNames:
652                return self.__tokdict[name][-1][0]
653            else:
654                return ParseResults([ v[0] for v in self.__tokdict[name] ])
655        else:
656            return ""
657
658    def __add__( self, other ):
659        ret = self.copy()
660        ret += other
661        return ret
662
663    def __iadd__( self, other ):
664        if other.__tokdict:
665            offset = len(self.__toklist)
666            addoffset = lambda a: offset if a<0 else a+offset
667            otheritems = other.__tokdict.items()
668            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
669                                for (k,vlist) in otheritems for v in vlist]
670            for k,v in otherdictitems:
671                self[k] = v
672                if isinstance(v[0],ParseResults):
673                    v[0].__parent = wkref(self)
674
675        self.__toklist += other.__toklist
676        self.__accumNames.update( other.__accumNames )
677        return self
678
679    def __radd__(self, other):
680        if isinstance(other,int) and other == 0:
681            # useful for merging many ParseResults using sum() builtin
682            return self.copy()
683        else:
684            # this may raise a TypeError - so be it
685            return other + self
686
687    def __repr__( self ):
688        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
689
690    def __str__( self ):
691        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
692
693    def _asStringList( self, sep='' ):
694        out = []
695        for item in self.__toklist:
696            if out and sep:
697                out.append(sep)
698            if isinstance( item, ParseResults ):
699                out += item._asStringList()
700            else:
701                out.append( _ustr(item) )
702        return out
703
704    def asList( self ):
705        """
706        Returns the parse results as a nested list of matching tokens, all converted to strings.
707
708        Example::
709            patt = OneOrMore(Word(alphas))
710            result = patt.parseString("sldkj lsdkj sldkj")
711            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
712            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
713
714            # Use asList() to create an actual list
715            result_list = result.asList()
716            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
717        """
718        return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
719
720    def asDict( self ):
721        """
722        Returns the named parse results as a nested dictionary.
723
724        Example::
725            integer = Word(nums)
726            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
727
728            result = date_str.parseString('12/31/1999')
729            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
730
731            result_dict = result.asDict()
732            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
733
734            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
735            import json
736            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
737            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
738        """
739        if PY_3:
740            item_fn = self.items
741        else:
742            item_fn = self.iteritems
743
744        def toItem(obj):
745            if isinstance(obj, ParseResults):
746                if obj.haskeys():
747                    return obj.asDict()
748                else:
749                    return [toItem(v) for v in obj]
750            else:
751                return obj
752
753        return dict((k,toItem(v)) for k,v in item_fn())
754
755    def copy( self ):
756        """
757        Returns a new copy of a C{ParseResults} object.
758        """
759        ret = ParseResults( self.__toklist )
760        ret.__tokdict = self.__tokdict.copy()
761        ret.__parent = self.__parent
762        ret.__accumNames.update( self.__accumNames )
763        ret.__name = self.__name
764        return ret
765
766    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
767        """
768        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
769        """
770        nl = "\n"
771        out = []
772        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
773                                                            for v in vlist)
774        nextLevelIndent = indent + "  "
775
776        # collapse out indents if formatting is not desired
777        if not formatted:
778            indent = ""
779            nextLevelIndent = ""
780            nl = ""
781
782        selfTag = None
783        if doctag is not None:
784            selfTag = doctag
785        else:
786            if self.__name:
787                selfTag = self.__name
788
789        if not selfTag:
790            if namedItemsOnly:
791                return ""
792            else:
793                selfTag = "ITEM"
794
795        out += [ nl, indent, "<", selfTag, ">" ]
796
797        for i,res in enumerate(self.__toklist):
798            if isinstance(res,ParseResults):
799                if i in namedItems:
800                    out += [ res.asXML(namedItems[i],
801                                        namedItemsOnly and doctag is None,
802                                        nextLevelIndent,
803                                        formatted)]
804                else:
805                    out += [ res.asXML(None,
806                                        namedItemsOnly and doctag is None,
807                                        nextLevelIndent,
808                                        formatted)]
809            else:
810                # individual token, see if there is a name for it
811                resTag = None
812                if i in namedItems:
813                    resTag = namedItems[i]
814                if not resTag:
815                    if namedItemsOnly:
816                        continue
817                    else:
818                        resTag = "ITEM"
819                xmlBodyText = _xml_escape(_ustr(res))
820                out += [ nl, nextLevelIndent, "<", resTag, ">",
821                                                xmlBodyText,
822                                                "</", resTag, ">" ]
823
824        out += [ nl, indent, "</", selfTag, ">" ]
825        return "".join(out)
826
827    def __lookup(self,sub):
828        for k,vlist in self.__tokdict.items():
829            for v,loc in vlist:
830                if sub is v:
831                    return k
832        return None
833
834    def getName(self):
835        r"""
836        Returns the results name for this token expression. Useful when several
837        different expressions might match at a particular location.
838
839        Example::
840            integer = Word(nums)
841            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
842            house_number_expr = Suppress('#') + Word(nums, alphanums)
843            user_data = (Group(house_number_expr)("house_number")
844                        | Group(ssn_expr)("ssn")
845                        | Group(integer)("age"))
846            user_info = OneOrMore(user_data)
847
848            result = user_info.parseString("22 111-22-3333 #221B")
849            for item in result:
850                print(item.getName(), ':', item[0])
851        prints::
852            age : 22
853            ssn : 111-22-3333
854            house_number : 221B
855        """
856        if self.__name:
857            return self.__name
858        elif self.__parent:
859            par = self.__parent()
860            if par:
861                return par.__lookup(self)
862            else:
863                return None
864        elif (len(self) == 1 and
865               len(self.__tokdict) == 1 and
866               next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
867            return next(iter(self.__tokdict.keys()))
868        else:
869            return None
870
871    def dump(self, indent='', depth=0, full=True):
872        """
873        Diagnostic method for listing out the contents of a C{ParseResults}.
874        Accepts an optional C{indent} argument so that this string can be embedded
875        in a nested display of other data.
876
877        Example::
878            integer = Word(nums)
879            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
880
881            result = date_str.parseString('12/31/1999')
882            print(result.dump())
883        prints::
884            ['12', '/', '31', '/', '1999']
885            - day: 1999
886            - month: 31
887            - year: 12
888        """
889        out = []
890        NL = '\n'
891        out.append( indent+_ustr(self.asList()) )
892        if full:
893            if self.haskeys():
894                items = sorted((str(k), v) for k,v in self.items())
895                for k,v in items:
896                    if out:
897                        out.append(NL)
898                    out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
899                    if isinstance(v,ParseResults):
900                        if v:
901                            out.append( v.dump(indent,depth+1) )
902                        else:
903                            out.append(_ustr(v))
904                    else:
905                        out.append(repr(v))
906            elif any(isinstance(vv,ParseResults) for vv in self):
907                v = self
908                for i,vv in enumerate(v):
909                    if isinstance(vv,ParseResults):
910                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) ))
911                    else:
912                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv)))
913
914        return "".join(out)
915
916    def pprint(self, *args, **kwargs):
917        """
918        Pretty-printer for parsed results as a list, using the C{pprint} module.
919        Accepts additional positional or keyword args as defined for the
920        C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
921
922        Example::
923            ident = Word(alphas, alphanums)
924            num = Word(nums)
925            func = Forward()
926            term = ident | num | Group('(' + func + ')')
927            func <<= ident + Group(Optional(delimitedList(term)))
928            result = func.parseString("fna a,b,(fnb c,d,200),100")
929            result.pprint(width=40)
930        prints::
931            ['fna',
932             ['a',
933              'b',
934              ['(', 'fnb', ['c', 'd', '200'], ')'],
935              '100']]
936        """
937        pprint.pprint(self.asList(), *args, **kwargs)
938
939    # add support for pickle protocol
940    def __getstate__(self):
941        return ( self.__toklist,
942                 ( self.__tokdict.copy(),
943                   self.__parent is not None and self.__parent() or None,
944                   self.__accumNames,
945                   self.__name ) )
946
947    def __setstate__(self,state):
948        self.__toklist = state[0]
949        (self.__tokdict,
950         par,
951         inAccumNames,
952         self.__name) = state[1]
953        self.__accumNames = {}
954        self.__accumNames.update(inAccumNames)
955        if par is not None:
956            self.__parent = wkref(par)
957        else:
958            self.__parent = None
959
960    def __getnewargs__(self):
961        return self.__toklist, self.__name, self.__asList, self.__modal
962
963    def __dir__(self):
964        return (dir(type(self)) + list(self.keys()))
965
966MutableMapping.register(ParseResults)
967
968def col (loc,strg):
969    """Returns current column within a string, counting newlines as line separators.
970   The first column is number 1.
971
972   Note: the default parsing behavior is to expand tabs in the input string
973   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
974   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
975   consistent view of the parsed string, the parse location, and line and column
976   positions within the parsed string.
977   """
978    s = strg
979    return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
980
981def lineno(loc,strg):
982    """Returns current line number within a string, counting newlines as line separators.
983   The first line is number 1.
984
985   Note: the default parsing behavior is to expand tabs in the input string
986   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
987   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
988   consistent view of the parsed string, the parse location, and line and column
989   positions within the parsed string.
990   """
991    return strg.count("\n",0,loc) + 1
992
993def line( loc, strg ):
994    """Returns the line of text containing loc within a string, counting newlines as line separators.
995       """
996    lastCR = strg.rfind("\n", 0, loc)
997    nextCR = strg.find("\n", loc)
998    if nextCR >= 0:
999        return strg[lastCR+1:nextCR]
1000    else:
1001        return strg[lastCR+1:]
1002
1003def _defaultStartDebugAction( instring, loc, expr ):
1004    print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
1005
1006def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
1007    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
1008
1009def _defaultExceptionDebugAction( instring, loc, expr, exc ):
1010    print ("Exception raised:" + _ustr(exc))
1011
1012def nullDebugAction(*args):
1013    """'Do-nothing' debug action, to suppress debugging output during parsing."""
1014    pass
1015
1016# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
1017#~ 'decorator to trim function calls to match the arity of the target'
1018#~ def _trim_arity(func, maxargs=3):
1019    #~ if func in singleArgBuiltins:
1020        #~ return lambda s,l,t: func(t)
1021    #~ limit = 0
1022    #~ foundArity = False
1023    #~ def wrapper(*args):
1024        #~ nonlocal limit,foundArity
1025        #~ while 1:
1026            #~ try:
1027                #~ ret = func(*args[limit:])
1028                #~ foundArity = True
1029                #~ return ret
1030            #~ except TypeError:
1031                #~ if limit == maxargs or foundArity:
1032                    #~ raise
1033                #~ limit += 1
1034                #~ continue
1035    #~ return wrapper
1036
1037# this version is Python 2.x-3.x cross-compatible
1038'decorator to trim function calls to match the arity of the target'
1039def _trim_arity(func, maxargs=2):
1040    if func in singleArgBuiltins:
1041        return lambda s,l,t: func(t)
1042    limit = [0]
1043    foundArity = [False]
1044
1045    # traceback return data structure changed in Py3.5 - normalize back to plain tuples
1046    if system_version[:2] >= (3,5):
1047        def extract_stack(limit=0):
1048            # special handling for Python 3.5.0 - extra deep call stack by 1
1049            offset = -3 if system_version == (3,5,0) else -2
1050            frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1051            return [frame_summary[:2]]
1052        def extract_tb(tb, limit=0):
1053            frames = traceback.extract_tb(tb, limit=limit)
1054            frame_summary = frames[-1]
1055            return [frame_summary[:2]]
1056    else:
1057        extract_stack = traceback.extract_stack
1058        extract_tb = traceback.extract_tb
1059
1060    # synthesize what would be returned by traceback.extract_stack at the call to
1061    # user's parse action 'func', so that we don't incur call penalty at parse time
1062
1063    LINE_DIFF = 6
1064    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1065    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1066    this_line = extract_stack(limit=2)[-1]
1067    pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1068
1069    def wrapper(*args):
1070        while 1:
1071            try:
1072                ret = func(*args[limit[0]:])
1073                foundArity[0] = True
1074                return ret
1075            except TypeError:
1076                # re-raise TypeErrors if they did not come from our arity testing
1077                if foundArity[0]:
1078                    raise
1079                else:
1080                    try:
1081                        tb = sys.exc_info()[-1]
1082                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1083                            raise
1084                    finally:
1085                        del tb
1086
1087                if limit[0] <= maxargs:
1088                    limit[0] += 1
1089                    continue
1090                raise
1091
1092    # copy func name to wrapper for sensible debug output
1093    func_name = "<parse action>"
1094    try:
1095        func_name = getattr(func, '__name__',
1096                            getattr(func, '__class__').__name__)
1097    except Exception:
1098        func_name = str(func)
1099    wrapper.__name__ = func_name
1100
1101    return wrapper
1102
1103class ParserElement(object):
1104    """Abstract base level parser element class."""
1105    DEFAULT_WHITE_CHARS = " \n\t\r"
1106    verbose_stacktrace = False
1107
1108    @staticmethod
1109    def setDefaultWhitespaceChars( chars ):
1110        r"""
1111        Overrides the default whitespace chars
1112
1113        Example::
1114            # default whitespace chars are space, <TAB> and newline
1115            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
1116
1117            # change to just treat newline as significant
1118            ParserElement.setDefaultWhitespaceChars(" \t")
1119            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
1120        """
1121        ParserElement.DEFAULT_WHITE_CHARS = chars
1122
1123    @staticmethod
1124    def inlineLiteralsUsing(cls):
1125        """
1126        Set class to be used for inclusion of string literals into a parser.
1127
1128        Example::
1129            # default literal class used is Literal
1130            integer = Word(nums)
1131            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1132
1133            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1134
1135
1136            # change to Suppress
1137            ParserElement.inlineLiteralsUsing(Suppress)
1138            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1139
1140            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
1141        """
1142        ParserElement._literalStringClass = cls
1143
1144    def __init__( self, savelist=False ):
1145        self.parseAction = list()
1146        self.failAction = None
1147        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
1148        self.strRepr = None
1149        self.resultsName = None
1150        self.saveAsList = savelist
1151        self.skipWhitespace = True
1152        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1153        self.copyDefaultWhiteChars = True
1154        self.mayReturnEmpty = False # used when checking for left-recursion
1155        self.keepTabs = False
1156        self.ignoreExprs = list()
1157        self.debug = False
1158        self.streamlined = False
1159        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1160        self.errmsg = ""
1161        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1162        self.debugActions = ( None, None, None ) #custom debug actions
1163        self.re = None
1164        self.callPreparse = True # used to avoid redundant calls to preParse
1165        self.callDuringTry = False
1166
1167    def copy( self ):
1168        """
1169        Make a copy of this C{ParserElement}.  Useful for defining different parse actions
1170        for the same parsing pattern, using copies of the original parse element.
1171
1172        Example::
1173            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1174            integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1175            integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1176
1177            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1178        prints::
1179            [5120, 100, 655360, 268435456]
1180        Equivalent form of C{expr.copy()} is just C{expr()}::
1181            integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1182        """
1183        cpy = copy.copy( self )
1184        cpy.parseAction = self.parseAction[:]
1185        cpy.ignoreExprs = self.ignoreExprs[:]
1186        if self.copyDefaultWhiteChars:
1187            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1188        return cpy
1189
1190    def setName( self, name ):
1191        """
1192        Define name for this expression, makes debugging and exception messages clearer.
1193
1194        Example::
1195            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1196            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
1197        """
1198        self.name = name
1199        self.errmsg = "Expected " + self.name
1200        if hasattr(self,"exception"):
1201            self.exception.msg = self.errmsg
1202        return self
1203
1204    def setResultsName( self, name, listAllMatches=False ):
1205        """
1206        Define name for referencing matching tokens as a nested attribute
1207        of the returned parse results.
1208        NOTE: this returns a *copy* of the original C{ParserElement} object;
1209        this is so that the client can define a basic element, such as an
1210        integer, and reference it in multiple places with different names.
1211
1212        You can also set results names using the abbreviated syntax,
1213        C{expr("name")} in place of C{expr.setResultsName("name")} -
1214        see L{I{__call__}<__call__>}.
1215
1216        Example::
1217            date_str = (integer.setResultsName("year") + '/'
1218                        + integer.setResultsName("month") + '/'
1219                        + integer.setResultsName("day"))
1220
1221            # equivalent form:
1222            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1223        """
1224        newself = self.copy()
1225        if name.endswith("*"):
1226            name = name[:-1]
1227            listAllMatches=True
1228        newself.resultsName = name
1229        newself.modalResults = not listAllMatches
1230        return newself
1231
1232    def setBreak(self,breakFlag = True):
1233        """Method to invoke the Python pdb debugger when this element is
1234           about to be parsed. Set C{breakFlag} to True to enable, False to
1235           disable.
1236        """
1237        if breakFlag:
1238            _parseMethod = self._parse
1239            def breaker(instring, loc, doActions=True, callPreParse=True):
1240                import pdb
1241                pdb.set_trace()
1242                return _parseMethod( instring, loc, doActions, callPreParse )
1243            breaker._originalParseMethod = _parseMethod
1244            self._parse = breaker
1245        else:
1246            if hasattr(self._parse,"_originalParseMethod"):
1247                self._parse = self._parse._originalParseMethod
1248        return self
1249
1250    def setParseAction( self, *fns, **kwargs ):
1251        """
1252        Define one or more actions to perform when successfully matching parse element definition.
1253        Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1254        C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1255         - s   = the original string being parsed (see note below)
1256         - loc = the location of the matching substring
1257         - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1258        If the functions in fns modify the tokens, they can return them as the return
1259        value from fn, and the modified list of tokens will replace the original.
1260        Otherwise, fn does not need to return any value.
1261
1262        Optional keyword arguments:
1263         - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1264
1265        Note: the default parsing behavior is to expand tabs in the input string
1266        before starting the parsing process.  See L{I{parseString}<parseString>} for more information
1267        on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1268        consistent view of the parsed string, the parse location, and line and column
1269        positions within the parsed string.
1270
1271        Example::
1272            integer = Word(nums)
1273            date_str = integer + '/' + integer + '/' + integer
1274
1275            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1276
1277            # use parse action to convert to ints at parse time
1278            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1279            date_str = integer + '/' + integer + '/' + integer
1280
1281            # note that integer fields are now ints, not strings
1282            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
1283        """
1284        self.parseAction = list(map(_trim_arity, list(fns)))
1285        self.callDuringTry = kwargs.get("callDuringTry", False)
1286        return self
1287
1288    def addParseAction( self, *fns, **kwargs ):
1289        """
1290        Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1291
1292        See examples in L{I{copy}<copy>}.
1293        """
1294        self.parseAction += list(map(_trim_arity, list(fns)))
1295        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1296        return self
1297
1298    def addCondition(self, *fns, **kwargs):
1299        """Add a boolean predicate function to expression's list of parse actions. See
1300        L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1301        functions passed to C{addCondition} need to return boolean success/fail of the condition.
1302
1303        Optional keyword arguments:
1304         - message = define a custom message to be used in the raised exception
1305         - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1306
1307        Example::
1308            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1309            year_int = integer.copy()
1310            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1311            date_str = year_int + '/' + integer + '/' + integer
1312
1313            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1314        """
1315        msg = kwargs.get("message", "failed user-defined condition")
1316        exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1317        for fn in fns:
1318            def pa(s,l,t):
1319                if not bool(_trim_arity(fn)(s,l,t)):
1320                    raise exc_type(s,l,msg)
1321            self.parseAction.append(pa)
1322        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1323        return self
1324
1325    def setFailAction( self, fn ):
1326        """Define action to perform if parsing fails at this expression.
1327           Fail acton fn is a callable function that takes the arguments
1328           C{fn(s,loc,expr,err)} where:
1329            - s = string being parsed
1330            - loc = location where expression match was attempted and failed
1331            - expr = the parse expression that failed
1332            - err = the exception thrown
1333           The function returns no value.  It may throw C{L{ParseFatalException}}
1334           if it is desired to stop parsing immediately."""
1335        self.failAction = fn
1336        return self
1337
1338    def _skipIgnorables( self, instring, loc ):
1339        exprsFound = True
1340        while exprsFound:
1341            exprsFound = False
1342            for e in self.ignoreExprs:
1343                try:
1344                    while 1:
1345                        loc,dummy = e._parse( instring, loc )
1346                        exprsFound = True
1347                except ParseException:
1348                    pass
1349        return loc
1350
1351    def preParse( self, instring, loc ):
1352        if self.ignoreExprs:
1353            loc = self._skipIgnorables( instring, loc )
1354
1355        if self.skipWhitespace:
1356            wt = self.whiteChars
1357            instrlen = len(instring)
1358            while loc < instrlen and instring[loc] in wt:
1359                loc += 1
1360
1361        return loc
1362
1363    def parseImpl( self, instring, loc, doActions=True ):
1364        return loc, []
1365
1366    def postParse( self, instring, loc, tokenlist ):
1367        return tokenlist
1368
1369    #~ @profile
1370    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1371        debugging = ( self.debug ) #and doActions )
1372
1373        if debugging or self.failAction:
1374            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
1375            if (self.debugActions[0] ):
1376                self.debugActions[0]( instring, loc, self )
1377            if callPreParse and self.callPreparse:
1378                preloc = self.preParse( instring, loc )
1379            else:
1380                preloc = loc
1381            tokensStart = preloc
1382            try:
1383                try:
1384                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1385                except IndexError:
1386                    raise ParseException( instring, len(instring), self.errmsg, self )
1387            except ParseBaseException as err:
1388                #~ print ("Exception raised:", err)
1389                if self.debugActions[2]:
1390                    self.debugActions[2]( instring, tokensStart, self, err )
1391                if self.failAction:
1392                    self.failAction( instring, tokensStart, self, err )
1393                raise
1394        else:
1395            if callPreParse and self.callPreparse:
1396                preloc = self.preParse( instring, loc )
1397            else:
1398                preloc = loc
1399            tokensStart = preloc
1400            if self.mayIndexError or preloc >= len(instring):
1401                try:
1402                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1403                except IndexError:
1404                    raise ParseException( instring, len(instring), self.errmsg, self )
1405            else:
1406                loc,tokens = self.parseImpl( instring, preloc, doActions )
1407
1408        tokens = self.postParse( instring, loc, tokens )
1409
1410        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1411        if self.parseAction and (doActions or self.callDuringTry):
1412            if debugging:
1413                try:
1414                    for fn in self.parseAction:
1415                        tokens = fn( instring, tokensStart, retTokens )
1416                        if tokens is not None:
1417                            retTokens = ParseResults( tokens,
1418                                                      self.resultsName,
1419                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1420                                                      modal=self.modalResults )
1421                except ParseBaseException as err:
1422                    #~ print "Exception raised in user parse action:", err
1423                    if (self.debugActions[2] ):
1424                        self.debugActions[2]( instring, tokensStart, self, err )
1425                    raise
1426            else:
1427                for fn in self.parseAction:
1428                    tokens = fn( instring, tokensStart, retTokens )
1429                    if tokens is not None:
1430                        retTokens = ParseResults( tokens,
1431                                                  self.resultsName,
1432                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1433                                                  modal=self.modalResults )
1434        if debugging:
1435            #~ print ("Matched",self,"->",retTokens.asList())
1436            if (self.debugActions[1] ):
1437                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1438
1439        return loc, retTokens
1440
1441    def tryParse( self, instring, loc ):
1442        try:
1443            return self._parse( instring, loc, doActions=False )[0]
1444        except ParseFatalException:
1445            raise ParseException( instring, loc, self.errmsg, self)
1446
1447    def canParseNext(self, instring, loc):
1448        try:
1449            self.tryParse(instring, loc)
1450        except (ParseException, IndexError):
1451            return False
1452        else:
1453            return True
1454
1455    class _UnboundedCache(object):
1456        def __init__(self):
1457            cache = {}
1458            self.not_in_cache = not_in_cache = object()
1459
1460            def get(self, key):
1461                return cache.get(key, not_in_cache)
1462
1463            def set(self, key, value):
1464                cache[key] = value
1465
1466            def clear(self):
1467                cache.clear()
1468
1469            def cache_len(self):
1470                return len(cache)
1471
1472            self.get = types.MethodType(get, self)
1473            self.set = types.MethodType(set, self)
1474            self.clear = types.MethodType(clear, self)
1475            self.__len__ = types.MethodType(cache_len, self)
1476
1477    if _OrderedDict is not None:
1478        class _FifoCache(object):
1479            def __init__(self, size):
1480                self.not_in_cache = not_in_cache = object()
1481
1482                cache = _OrderedDict()
1483
1484                def get(self, key):
1485                    return cache.get(key, not_in_cache)
1486
1487                def set(self, key, value):
1488                    cache[key] = value
1489                    while len(cache) > size:
1490                        try:
1491                            cache.popitem(False)
1492                        except KeyError:
1493                            pass
1494
1495                def clear(self):
1496                    cache.clear()
1497
1498                def cache_len(self):
1499                    return len(cache)
1500
1501                self.get = types.MethodType(get, self)
1502                self.set = types.MethodType(set, self)
1503                self.clear = types.MethodType(clear, self)
1504                self.__len__ = types.MethodType(cache_len, self)
1505
1506    else:
1507        class _FifoCache(object):
1508            def __init__(self, size):
1509                self.not_in_cache = not_in_cache = object()
1510
1511                cache = {}
1512                key_fifo = collections.deque([], size)
1513
1514                def get(self, key):
1515                    return cache.get(key, not_in_cache)
1516
1517                def set(self, key, value):
1518                    cache[key] = value
1519                    while len(key_fifo) > size:
1520                        cache.pop(key_fifo.popleft(), None)
1521                    key_fifo.append(key)
1522
1523                def clear(self):
1524                    cache.clear()
1525                    key_fifo.clear()
1526
1527                def cache_len(self):
1528                    return len(cache)
1529
1530                self.get = types.MethodType(get, self)
1531                self.set = types.MethodType(set, self)
1532                self.clear = types.MethodType(clear, self)
1533                self.__len__ = types.MethodType(cache_len, self)
1534
1535    # argument cache for optimizing repeated calls when backtracking through recursive expressions
1536    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1537    packrat_cache_lock = RLock()
1538    packrat_cache_stats = [0, 0]
1539
1540    # this method gets repeatedly called during backtracking with the same arguments -
1541    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1542    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1543        HIT, MISS = 0, 1
1544        lookup = (self, instring, loc, callPreParse, doActions)
1545        with ParserElement.packrat_cache_lock:
1546            cache = ParserElement.packrat_cache
1547            value = cache.get(lookup)
1548            if value is cache.not_in_cache:
1549                ParserElement.packrat_cache_stats[MISS] += 1
1550                try:
1551                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
1552                except ParseBaseException as pe:
1553                    # cache a copy of the exception, without the traceback
1554                    cache.set(lookup, pe.__class__(*pe.args))
1555                    raise
1556                else:
1557                    cache.set(lookup, (value[0], value[1].copy()))
1558                    return value
1559            else:
1560                ParserElement.packrat_cache_stats[HIT] += 1
1561                if isinstance(value, Exception):
1562                    raise value
1563                return (value[0], value[1].copy())
1564
1565    _parse = _parseNoCache
1566
1567    @staticmethod
1568    def resetCache():
1569        ParserElement.packrat_cache.clear()
1570        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1571
1572    _packratEnabled = False
1573    @staticmethod
1574    def enablePackrat(cache_size_limit=128):
1575        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1576           Repeated parse attempts at the same string location (which happens
1577           often in many complex grammars) can immediately return a cached value,
1578           instead of re-executing parsing/validating code.  Memoizing is done of
1579           both valid results and parsing exceptions.
1580
1581           Parameters:
1582            - cache_size_limit - (default=C{128}) - if an integer value is provided
1583              will limit the size of the packrat cache; if None is passed, then
1584              the cache size will be unbounded; if 0 is passed, the cache will
1585              be effectively disabled.
1586
1587           This speedup may break existing programs that use parse actions that
1588           have side-effects.  For this reason, packrat parsing is disabled when
1589           you first import pyparsing.  To activate the packrat feature, your
1590           program must call the class method C{ParserElement.enablePackrat()}.  If
1591           your program uses C{psyco} to "compile as you go", you must call
1592           C{enablePackrat} before calling C{psyco.full()}.  If you do not do this,
1593           Python will crash.  For best results, call C{enablePackrat()} immediately
1594           after importing pyparsing.
1595
1596           Example::
1597               import pyparsing
1598               pyparsing.ParserElement.enablePackrat()
1599        """
1600        if not ParserElement._packratEnabled:
1601            ParserElement._packratEnabled = True
1602            if cache_size_limit is None:
1603                ParserElement.packrat_cache = ParserElement._UnboundedCache()
1604            else:
1605                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1606            ParserElement._parse = ParserElement._parseCache
1607
1608    def parseString( self, instring, parseAll=False ):
1609        """
1610        Execute the parse expression with the given string.
1611        This is the main interface to the client code, once the complete
1612        expression has been built.
1613
1614        If you want the grammar to require that the entire input string be
1615        successfully parsed, then set C{parseAll} to True (equivalent to ending
1616        the grammar with C{L{StringEnd()}}).
1617
1618        Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1619        in order to report proper column numbers in parse actions.
1620        If the input string contains tabs and
1621        the grammar uses parse actions that use the C{loc} argument to index into the
1622        string being parsed, you can ensure you have a consistent view of the input
1623        string by:
1624         - calling C{parseWithTabs} on your grammar before calling C{parseString}
1625           (see L{I{parseWithTabs}<parseWithTabs>})
1626         - define your parse action using the full C{(s,loc,toks)} signature, and
1627           reference the input string using the parse action's C{s} argument
1628         - explictly expand the tabs in your input string before calling
1629           C{parseString}
1630
1631        Example::
1632            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
1633            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
1634        """
1635        ParserElement.resetCache()
1636        if not self.streamlined:
1637            self.streamline()
1638            #~ self.saveAsList = True
1639        for e in self.ignoreExprs:
1640            e.streamline()
1641        if not self.keepTabs:
1642            instring = instring.expandtabs()
1643        try:
1644            loc, tokens = self._parse( instring, 0 )
1645            if parseAll:
1646                loc = self.preParse( instring, loc )
1647                se = Empty() + StringEnd()
1648                se._parse( instring, loc )
1649        except ParseBaseException as exc:
1650            if ParserElement.verbose_stacktrace:
1651                raise
1652            else:
1653                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1654                raise exc
1655        else:
1656            return tokens
1657
1658    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1659        """
1660        Scan the input string for expression matches.  Each match will return the
1661        matching tokens, start location, and end location.  May be called with optional
1662        C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
1663        C{overlap} is specified, then overlapping matches will be reported.
1664
1665        Note that the start and end locations are reported relative to the string
1666        being parsed.  See L{I{parseString}<parseString>} for more information on parsing
1667        strings with embedded tabs.
1668
1669        Example::
1670            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1671            print(source)
1672            for tokens,start,end in Word(alphas).scanString(source):
1673                print(' '*start + '^'*(end-start))
1674                print(' '*start + tokens[0])
1675
1676        prints::
1677
1678            sldjf123lsdjjkf345sldkjf879lkjsfd987
1679            ^^^^^
1680            sldjf
1681                    ^^^^^^^
1682                    lsdjjkf
1683                              ^^^^^^
1684                              sldkjf
1685                                       ^^^^^^
1686                                       lkjsfd
1687        """
1688        if not self.streamlined:
1689            self.streamline()
1690        for e in self.ignoreExprs:
1691            e.streamline()
1692
1693        if not self.keepTabs:
1694            instring = _ustr(instring).expandtabs()
1695        instrlen = len(instring)
1696        loc = 0
1697        preparseFn = self.preParse
1698        parseFn = self._parse
1699        ParserElement.resetCache()
1700        matches = 0
1701        try:
1702            while loc <= instrlen and matches < maxMatches:
1703                try:
1704                    preloc = preparseFn( instring, loc )
1705                    nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1706                except ParseException:
1707                    loc = preloc+1
1708                else:
1709                    if nextLoc > loc:
1710                        matches += 1
1711                        yield tokens, preloc, nextLoc
1712                        if overlap:
1713                            nextloc = preparseFn( instring, loc )
1714                            if nextloc > loc:
1715                                loc = nextLoc
1716                            else:
1717                                loc += 1
1718                        else:
1719                            loc = nextLoc
1720                    else:
1721                        loc = preloc+1
1722        except ParseBaseException as exc:
1723            if ParserElement.verbose_stacktrace:
1724                raise
1725            else:
1726                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1727                raise exc
1728
1729    def transformString( self, instring ):
1730        """
1731        Extension to C{L{scanString}}, to modify matching text with modified tokens that may
1732        be returned from a parse action.  To use C{transformString}, define a grammar and
1733        attach a parse action to it that modifies the returned token list.
1734        Invoking C{transformString()} on a target string will then scan for matches,
1735        and replace the matched text patterns according to the logic in the parse
1736        action.  C{transformString()} returns the resulting transformed string.
1737
1738        Example::
1739            wd = Word(alphas)
1740            wd.setParseAction(lambda toks: toks[0].title())
1741
1742            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
1743        Prints::
1744            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1745        """
1746        out = []
1747        lastE = 0
1748        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1749        # keep string locs straight between transformString and scanString
1750        self.keepTabs = True
1751        try:
1752            for t,s,e in self.scanString( instring ):
1753                out.append( instring[lastE:s] )
1754                if t:
1755                    if isinstance(t,ParseResults):
1756                        out += t.asList()
1757                    elif isinstance(t,list):
1758                        out += t
1759                    else:
1760                        out.append(t)
1761                lastE = e
1762            out.append(instring[lastE:])
1763            out = [o for o in out if o]
1764            return "".join(map(_ustr,_flatten(out)))
1765        except ParseBaseException as exc:
1766            if ParserElement.verbose_stacktrace:
1767                raise
1768            else:
1769                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1770                raise exc
1771
1772    def searchString( self, instring, maxMatches=_MAX_INT ):
1773        """
1774        Another extension to C{L{scanString}}, simplifying the access to the tokens found
1775        to match the given parse expression.  May be called with optional
1776        C{maxMatches} argument, to clip searching after 'n' matches are found.
1777
1778        Example::
1779            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1780            cap_word = Word(alphas.upper(), alphas.lower())
1781
1782            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1783
1784            # the sum() builtin can be used to merge results into a single ParseResults object
1785            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1786        prints::
1787            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1788            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1789        """
1790        try:
1791            return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1792        except ParseBaseException as exc:
1793            if ParserElement.verbose_stacktrace:
1794                raise
1795            else:
1796                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1797                raise exc
1798
1799    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1800        """
1801        Generator method to split a string using the given expression as a separator.
1802        May be called with optional C{maxsplit} argument, to limit the number of splits;
1803        and the optional C{includeSeparators} argument (default=C{False}), if the separating
1804        matching text should be included in the split results.
1805
1806        Example::
1807            punc = oneOf(list(".,;:/-!?"))
1808            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1809        prints::
1810            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1811        """
1812        splits = 0
1813        last = 0
1814        for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1815            yield instring[last:s]
1816            if includeSeparators:
1817                yield t[0]
1818            last = e
1819        yield instring[last:]
1820
1821    def __add__(self, other ):
1822        """
1823        Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1824        converts them to L{Literal}s by default.
1825
1826        Example::
1827            greet = Word(alphas) + "," + Word(alphas) + "!"
1828            hello = "Hello, World!"
1829            print (hello, "->", greet.parseString(hello))
1830        Prints::
1831            Hello, World! -> ['Hello', ',', 'World', '!']
1832        """
1833        if isinstance( other, basestring ):
1834            other = ParserElement._literalStringClass( other )
1835        if not isinstance( other, ParserElement ):
1836            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1837                    SyntaxWarning, stacklevel=2)
1838            return None
1839        return And( [ self, other ] )
1840
1841    def __radd__(self, other ):
1842        """
1843        Implementation of + operator when left operand is not a C{L{ParserElement}}
1844        """
1845        if isinstance( other, basestring ):
1846            other = ParserElement._literalStringClass( other )
1847        if not isinstance( other, ParserElement ):
1848            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1849                    SyntaxWarning, stacklevel=2)
1850            return None
1851        return other + self
1852
1853    def __sub__(self, other):
1854        """
1855        Implementation of - operator, returns C{L{And}} with error stop
1856        """
1857        if isinstance( other, basestring ):
1858            other = ParserElement._literalStringClass( other )
1859        if not isinstance( other, ParserElement ):
1860            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1861                    SyntaxWarning, stacklevel=2)
1862            return None
1863        return self + And._ErrorStop() + other
1864
1865    def __rsub__(self, other ):
1866        """
1867        Implementation of - operator when left operand is not a C{L{ParserElement}}
1868        """
1869        if isinstance( other, basestring ):
1870            other = ParserElement._literalStringClass( other )
1871        if not isinstance( other, ParserElement ):
1872            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1873                    SyntaxWarning, stacklevel=2)
1874            return None
1875        return other - self
1876
1877    def __mul__(self,other):
1878        """
1879        Implementation of * operator, allows use of C{expr * 3} in place of
1880        C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer
1881        tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples
1882        may also include C{None} as in:
1883         - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1884              to C{expr*n + L{ZeroOrMore}(expr)}
1885              (read as "at least n instances of C{expr}")
1886         - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1887              (read as "0 to n instances of C{expr}")
1888         - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1889         - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1890
1891        Note that C{expr*(None,n)} does not raise an exception if
1892        more than n exprs exist in the input stream; that is,
1893        C{expr*(None,n)} does not enforce a maximum number of expr
1894        occurrences.  If this behavior is desired, then write
1895        C{expr*(None,n) + ~expr}
1896        """
1897        if isinstance(other,int):
1898            minElements, optElements = other,0
1899        elif isinstance(other,tuple):
1900            other = (other + (None, None))[:2]
1901            if other[0] is None:
1902                other = (0, other[1])
1903            if isinstance(other[0],int) and other[1] is None:
1904                if other[0] == 0:
1905                    return ZeroOrMore(self)
1906                if other[0] == 1:
1907                    return OneOrMore(self)
1908                else:
1909                    return self*other[0] + ZeroOrMore(self)
1910            elif isinstance(other[0],int) and isinstance(other[1],int):
1911                minElements, optElements = other
1912                optElements -= minElements
1913            else:
1914                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1915        else:
1916            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1917
1918        if minElements < 0:
1919            raise ValueError("cannot multiply ParserElement by negative value")
1920        if optElements < 0:
1921            raise ValueError("second tuple value must be greater or equal to first tuple value")
1922        if minElements == optElements == 0:
1923            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1924
1925        if (optElements):
1926            def makeOptionalList(n):
1927                if n>1:
1928                    return Optional(self + makeOptionalList(n-1))
1929                else:
1930                    return Optional(self)
1931            if minElements:
1932                if minElements == 1:
1933                    ret = self + makeOptionalList(optElements)
1934                else:
1935                    ret = And([self]*minElements) + makeOptionalList(optElements)
1936            else:
1937                ret = makeOptionalList(optElements)
1938        else:
1939            if minElements == 1:
1940                ret = self
1941            else:
1942                ret = And([self]*minElements)
1943        return ret
1944
1945    def __rmul__(self, other):
1946        return self.__mul__(other)
1947
1948    def __or__(self, other ):
1949        """
1950        Implementation of | operator - returns C{L{MatchFirst}}
1951        """
1952        if isinstance( other, basestring ):
1953            other = ParserElement._literalStringClass( other )
1954        if not isinstance( other, ParserElement ):
1955            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1956                    SyntaxWarning, stacklevel=2)
1957            return None
1958        return MatchFirst( [ self, other ] )
1959
1960    def __ror__(self, other ):
1961        """
1962        Implementation of | operator when left operand is not a C{L{ParserElement}}
1963        """
1964        if isinstance( other, basestring ):
1965            other = ParserElement._literalStringClass( other )
1966        if not isinstance( other, ParserElement ):
1967            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1968                    SyntaxWarning, stacklevel=2)
1969            return None
1970        return other | self
1971
1972    def __xor__(self, other ):
1973        """
1974        Implementation of ^ operator - returns C{L{Or}}
1975        """
1976        if isinstance( other, basestring ):
1977            other = ParserElement._literalStringClass( other )
1978        if not isinstance( other, ParserElement ):
1979            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1980                    SyntaxWarning, stacklevel=2)
1981            return None
1982        return Or( [ self, other ] )
1983
1984    def __rxor__(self, other ):
1985        """
1986        Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1987        """
1988        if isinstance( other, basestring ):
1989            other = ParserElement._literalStringClass( other )
1990        if not isinstance( other, ParserElement ):
1991            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1992                    SyntaxWarning, stacklevel=2)
1993            return None
1994        return other ^ self
1995
1996    def __and__(self, other ):
1997        """
1998        Implementation of & operator - returns C{L{Each}}
1999        """
2000        if isinstance( other, basestring ):
2001            other = ParserElement._literalStringClass( other )
2002        if not isinstance( other, ParserElement ):
2003            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2004                    SyntaxWarning, stacklevel=2)
2005            return None
2006        return Each( [ self, other ] )
2007
2008    def __rand__(self, other ):
2009        """
2010        Implementation of & operator when left operand is not a C{L{ParserElement}}
2011        """
2012        if isinstance( other, basestring ):
2013            other = ParserElement._literalStringClass( other )
2014        if not isinstance( other, ParserElement ):
2015            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
2016                    SyntaxWarning, stacklevel=2)
2017            return None
2018        return other & self
2019
2020    def __invert__( self ):
2021        """
2022        Implementation of ~ operator - returns C{L{NotAny}}
2023        """
2024        return NotAny( self )
2025
2026    def __call__(self, name=None):
2027        """
2028        Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
2029
2030        If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
2031        passed as C{True}.
2032
2033        If C{name} is omitted, same as calling C{L{copy}}.
2034
2035        Example::
2036            # these are equivalent
2037            userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2038            userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
2039        """
2040        if name is not None:
2041            return self.setResultsName(name)
2042        else:
2043            return self.copy()
2044
2045    def suppress( self ):
2046        """
2047        Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2048        cluttering up returned output.
2049        """
2050        return Suppress( self )
2051
2052    def leaveWhitespace( self ):
2053        """
2054        Disables the skipping of whitespace before matching the characters in the
2055        C{ParserElement}'s defined pattern.  This is normally only used internally by
2056        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2057        """
2058        self.skipWhitespace = False
2059        return self
2060
2061    def setWhitespaceChars( self, chars ):
2062        """
2063        Overrides the default whitespace chars
2064        """
2065        self.skipWhitespace = True
2066        self.whiteChars = chars
2067        self.copyDefaultWhiteChars = False
2068        return self
2069
2070    def parseWithTabs( self ):
2071        """
2072        Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2073        Must be called before C{parseString} when the input grammar contains elements that
2074        match C{<TAB>} characters.
2075        """
2076        self.keepTabs = True
2077        return self
2078
2079    def ignore( self, other ):
2080        """
2081        Define expression to be ignored (e.g., comments) while doing pattern
2082        matching; may be called repeatedly, to define multiple comment or other
2083        ignorable patterns.
2084
2085        Example::
2086            patt = OneOrMore(Word(alphas))
2087            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2088
2089            patt.ignore(cStyleComment)
2090            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2091        """
2092        if isinstance(other, basestring):
2093            other = Suppress(other)
2094
2095        if isinstance( other, Suppress ):
2096            if other not in self.ignoreExprs:
2097                self.ignoreExprs.append(other)
2098        else:
2099            self.ignoreExprs.append( Suppress( other.copy() ) )
2100        return self
2101
2102    def setDebugActions( self, startAction, successAction, exceptionAction ):
2103        """
2104        Enable display of debugging messages while doing pattern matching.
2105        """
2106        self.debugActions = (startAction or _defaultStartDebugAction,
2107                             successAction or _defaultSuccessDebugAction,
2108                             exceptionAction or _defaultExceptionDebugAction)
2109        self.debug = True
2110        return self
2111
2112    def setDebug( self, flag=True ):
2113        """
2114        Enable display of debugging messages while doing pattern matching.
2115        Set C{flag} to True to enable, False to disable.
2116
2117        Example::
2118            wd = Word(alphas).setName("alphaword")
2119            integer = Word(nums).setName("numword")
2120            term = wd | integer
2121
2122            # turn on debugging for wd
2123            wd.setDebug()
2124
2125            OneOrMore(term).parseString("abc 123 xyz 890")
2126
2127        prints::
2128            Match alphaword at loc 0(1,1)
2129            Matched alphaword -> ['abc']
2130            Match alphaword at loc 3(1,4)
2131            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2132            Match alphaword at loc 7(1,8)
2133            Matched alphaword -> ['xyz']
2134            Match alphaword at loc 11(1,12)
2135            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2136            Match alphaword at loc 15(1,16)
2137            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2138
2139        The output shown is that produced by the default debug actions - custom debug actions can be
2140        specified using L{setDebugActions}. Prior to attempting
2141        to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2142        is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2143        message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2144        which makes debugging and exception messages easier to understand - for instance, the default
2145        name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2146        """
2147        if flag:
2148            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2149        else:
2150            self.debug = False
2151        return self
2152
2153    def __str__( self ):
2154        return self.name
2155
2156    def __repr__( self ):
2157        return _ustr(self)
2158
2159    def streamline( self ):
2160        self.streamlined = True
2161        self.strRepr = None
2162        return self
2163
2164    def checkRecursion( self, parseElementList ):
2165        pass
2166
2167    def validate( self, validateTrace=[] ):
2168        """
2169        Check defined expressions for valid structure, check for infinite recursive definitions.
2170        """
2171        self.checkRecursion( [] )
2172
2173    def parseFile( self, file_or_filename, parseAll=False ):
2174        """
2175        Execute the parse expression on the given file or filename.
2176        If a filename is specified (instead of a file object),
2177        the entire file is opened, read, and closed before parsing.
2178        """
2179        try:
2180            file_contents = file_or_filename.read()
2181        except AttributeError:
2182            with open(file_or_filename, "r") as f:
2183                file_contents = f.read()
2184        try:
2185            return self.parseString(file_contents, parseAll)
2186        except ParseBaseException as exc:
2187            if ParserElement.verbose_stacktrace:
2188                raise
2189            else:
2190                # catch and re-raise exception from here, clears out pyparsing internal stack trace
2191                raise exc
2192
2193    def __eq__(self,other):
2194        if isinstance(other, ParserElement):
2195            return self is other or vars(self) == vars(other)
2196        elif isinstance(other, basestring):
2197            return self.matches(other)
2198        else:
2199            return super(ParserElement,self)==other
2200
2201    def __ne__(self,other):
2202        return not (self == other)
2203
2204    def __hash__(self):
2205        return hash(id(self))
2206
2207    def __req__(self,other):
2208        return self == other
2209
2210    def __rne__(self,other):
2211        return not (self == other)
2212
2213    def matches(self, testString, parseAll=True):
2214        """
2215        Method for quick testing of a parser against a test string. Good for simple
2216        inline microtests of sub expressions while building up larger parser.
2217
2218        Parameters:
2219         - testString - to test against this expression for a match
2220         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2221
2222        Example::
2223            expr = Word(nums)
2224            assert expr.matches("100")
2225        """
2226        try:
2227            self.parseString(_ustr(testString), parseAll=parseAll)
2228            return True
2229        except ParseBaseException:
2230            return False
2231
2232    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2233        """
2234        Execute the parse expression on a series of test strings, showing each
2235        test, the parsed results or where the parse failed. Quick and easy way to
2236        run a parse expression against a list of sample strings.
2237
2238        Parameters:
2239         - tests - a list of separate test strings, or a multiline string of test strings
2240         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2241         - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2242              string; pass None to disable comment filtering
2243         - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2244              if False, only dump nested list
2245         - printResults - (default=C{True}) prints test output to stdout
2246         - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2247
2248        Returns: a (success, results) tuple, where success indicates that all tests succeeded
2249        (or failed if C{failureTests} is True), and the results contain a list of lines of each
2250        test's output
2251
2252        Example::
2253            number_expr = pyparsing_common.number.copy()
2254
2255            result = number_expr.runTests('''
2256                # unsigned integer
2257                100
2258                # negative integer
2259                -100
2260                # float with scientific notation
2261                6.02e23
2262                # integer with scientific notation
2263                1e-12
2264                ''')
2265            print("Success" if result[0] else "Failed!")
2266
2267            result = number_expr.runTests('''
2268                # stray character
2269                100Z
2270                # missing leading digit before '.'
2271                -.100
2272                # too many '.'
2273                3.14.159
2274                ''', failureTests=True)
2275            print("Success" if result[0] else "Failed!")
2276        prints::
2277            # unsigned integer
2278            100
2279            [100]
2280
2281            # negative integer
2282            -100
2283            [-100]
2284
2285            # float with scientific notation
2286            6.02e23
2287            [6.02e+23]
2288
2289            # integer with scientific notation
2290            1e-12
2291            [1e-12]
2292
2293            Success
2294
2295            # stray character
2296            100Z
2297               ^
2298            FAIL: Expected end of text (at char 3), (line:1, col:4)
2299
2300            # missing leading digit before '.'
2301            -.100
2302            ^
2303            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2304
2305            # too many '.'
2306            3.14.159
2307                ^
2308            FAIL: Expected end of text (at char 4), (line:1, col:5)
2309
2310            Success
2311
2312        Each test string must be on a single line. If you want to test a string that spans multiple
2313        lines, create a test like this::
2314
2315            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2316
2317        (Note that this is a raw string literal, you must include the leading 'r'.)
2318        """
2319        if isinstance(tests, basestring):
2320            tests = list(map(str.strip, tests.rstrip().splitlines()))
2321        if isinstance(comment, basestring):
2322            comment = Literal(comment)
2323        allResults = []
2324        comments = []
2325        success = True
2326        for t in tests:
2327            if comment is not None and comment.matches(t, False) or comments and not t:
2328                comments.append(t)
2329                continue
2330            if not t:
2331                continue
2332            out = ['\n'.join(comments), t]
2333            comments = []
2334            try:
2335                t = t.replace(r'\n','\n')
2336                result = self.parseString(t, parseAll=parseAll)
2337                out.append(result.dump(full=fullDump))
2338                success = success and not failureTests
2339            except ParseBaseException as pe:
2340                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2341                if '\n' in t:
2342                    out.append(line(pe.loc, t))
2343                    out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2344                else:
2345                    out.append(' '*pe.loc + '^' + fatal)
2346                out.append("FAIL: " + str(pe))
2347                success = success and failureTests
2348                result = pe
2349            except Exception as exc:
2350                out.append("FAIL-EXCEPTION: " + str(exc))
2351                success = success and failureTests
2352                result = exc
2353
2354            if printResults:
2355                if fullDump:
2356                    out.append('')
2357                print('\n'.join(out))
2358
2359            allResults.append((t, result))
2360
2361        return success, allResults
2362
2363
2364class Token(ParserElement):
2365    """
2366    Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2367    """
2368    def __init__( self ):
2369        super(Token,self).__init__( savelist=False )
2370
2371
2372class Empty(Token):
2373    """
2374    An empty token, will always match.
2375    """
2376    def __init__( self ):
2377        super(Empty,self).__init__()
2378        self.name = "Empty"
2379        self.mayReturnEmpty = True
2380        self.mayIndexError = False
2381
2382
2383class NoMatch(Token):
2384    """
2385    A token that will never match.
2386    """
2387    def __init__( self ):
2388        super(NoMatch,self).__init__()
2389        self.name = "NoMatch"
2390        self.mayReturnEmpty = True
2391        self.mayIndexError = False
2392        self.errmsg = "Unmatchable token"
2393
2394    def parseImpl( self, instring, loc, doActions=True ):
2395        raise ParseException(instring, loc, self.errmsg, self)
2396
2397
2398class Literal(Token):
2399    """
2400    Token to exactly match a specified string.
2401
2402    Example::
2403        Literal('blah').parseString('blah')  # -> ['blah']
2404        Literal('blah').parseString('blahfooblah')  # -> ['blah']
2405        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
2406
2407    For case-insensitive matching, use L{CaselessLiteral}.
2408
2409    For keyword matching (force word break before and after the matched string),
2410    use L{Keyword} or L{CaselessKeyword}.
2411    """
2412    def __init__( self, matchString ):
2413        super(Literal,self).__init__()
2414        self.match = matchString
2415        self.matchLen = len(matchString)
2416        try:
2417            self.firstMatchChar = matchString[0]
2418        except IndexError:
2419            warnings.warn("null string passed to Literal; use Empty() instead",
2420                            SyntaxWarning, stacklevel=2)
2421            self.__class__ = Empty
2422        self.name = '"%s"' % _ustr(self.match)
2423        self.errmsg = "Expected " + self.name
2424        self.mayReturnEmpty = False
2425        self.mayIndexError = False
2426
2427    # Performance tuning: this routine gets called a *lot*
2428    # if this is a single character match string  and the first character matches,
2429    # short-circuit as quickly as possible, and avoid calling startswith
2430    #~ @profile
2431    def parseImpl( self, instring, loc, doActions=True ):
2432        if (instring[loc] == self.firstMatchChar and
2433            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2434            return loc+self.matchLen, self.match
2435        raise ParseException(instring, loc, self.errmsg, self)
2436_L = Literal
2437ParserElement._literalStringClass = Literal
2438
2439class Keyword(Token):
2440    """
2441    Token to exactly match a specified string as a keyword, that is, it must be
2442    immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
2443     - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2444     - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2445    Accepts two optional constructor arguments in addition to the keyword string:
2446     - C{identChars} is a string of characters that would be valid identifier characters,
2447          defaulting to all alphanumerics + "_" and "$"
2448     - C{caseless} allows case-insensitive matching, default is C{False}.
2449
2450    Example::
2451        Keyword("start").parseString("start")  # -> ['start']
2452        Keyword("start").parseString("starting")  # -> Exception
2453
2454    For case-insensitive matching, use L{CaselessKeyword}.
2455    """
2456    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2457
2458    def __init__( self, matchString, identChars=None, caseless=False ):
2459        super(Keyword,self).__init__()
2460        if identChars is None:
2461            identChars = Keyword.DEFAULT_KEYWORD_CHARS
2462        self.match = matchString
2463        self.matchLen = len(matchString)
2464        try:
2465            self.firstMatchChar = matchString[0]
2466        except IndexError:
2467            warnings.warn("null string passed to Keyword; use Empty() instead",
2468                            SyntaxWarning, stacklevel=2)
2469        self.name = '"%s"' % self.match
2470        self.errmsg = "Expected " + self.name
2471        self.mayReturnEmpty = False
2472        self.mayIndexError = False
2473        self.caseless = caseless
2474        if caseless:
2475            self.caselessmatch = matchString.upper()
2476            identChars = identChars.upper()
2477        self.identChars = set(identChars)
2478
2479    def parseImpl( self, instring, loc, doActions=True ):
2480        if self.caseless:
2481            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2482                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2483                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2484                return loc+self.matchLen, self.match
2485        else:
2486            if (instring[loc] == self.firstMatchChar and
2487                (self.matchLen==1 or instring.startswith(self.match,loc)) and
2488                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2489                (loc == 0 or instring[loc-1] not in self.identChars) ):
2490                return loc+self.matchLen, self.match
2491        raise ParseException(instring, loc, self.errmsg, self)
2492
2493    def copy(self):
2494        c = super(Keyword,self).copy()
2495        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2496        return c
2497
2498    @staticmethod
2499    def setDefaultKeywordChars( chars ):
2500        """Overrides the default Keyword chars
2501        """
2502        Keyword.DEFAULT_KEYWORD_CHARS = chars
2503
2504class CaselessLiteral(Literal):
2505    """
2506    Token to match a specified string, ignoring case of letters.
2507    Note: the matched results will always be in the case of the given
2508    match string, NOT the case of the input text.
2509
2510    Example::
2511        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2512
2513    (Contrast with example for L{CaselessKeyword}.)
2514    """
2515    def __init__( self, matchString ):
2516        super(CaselessLiteral,self).__init__( matchString.upper() )
2517        # Preserve the defining literal.
2518        self.returnString = matchString
2519        self.name = "'%s'" % self.returnString
2520        self.errmsg = "Expected " + self.name
2521
2522    def parseImpl( self, instring, loc, doActions=True ):
2523        if instring[ loc:loc+self.matchLen ].upper() == self.match:
2524            return loc+self.matchLen, self.returnString
2525        raise ParseException(instring, loc, self.errmsg, self)
2526
2527class CaselessKeyword(Keyword):
2528    """
2529    Caseless version of L{Keyword}.
2530
2531    Example::
2532        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2533
2534    (Contrast with example for L{CaselessLiteral}.)
2535    """
2536    def __init__( self, matchString, identChars=None ):
2537        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2538
2539    def parseImpl( self, instring, loc, doActions=True ):
2540        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2541             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2542            return loc+self.matchLen, self.match
2543        raise ParseException(instring, loc, self.errmsg, self)
2544
2545class CloseMatch(Token):
2546    """
2547    A variation on L{Literal} which matches "close" matches, that is,
2548    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2549     - C{match_string} - string to be matched
2550     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2551
2552    The results from a successful parse will contain the matched text from the input string and the following named results:
2553     - C{mismatches} - a list of the positions within the match_string where mismatches were found
2554     - C{original} - the original match_string used to compare against the input string
2555
2556    If C{mismatches} is an empty list, then the match was an exact match.
2557
2558    Example::
2559        patt = CloseMatch("ATCATCGAATGGA")
2560        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2561        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2562
2563        # exact match
2564        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2565
2566        # close match allowing up to 2 mismatches
2567        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2568        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2569    """
2570    def __init__(self, match_string, maxMismatches=1):
2571        super(CloseMatch,self).__init__()
2572        self.name = match_string
2573        self.match_string = match_string
2574        self.maxMismatches = maxMismatches
2575        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2576        self.mayIndexError = False
2577        self.mayReturnEmpty = False
2578
2579    def parseImpl( self, instring, loc, doActions=True ):
2580        start = loc
2581        instrlen = len(instring)
2582        maxloc = start + len(self.match_string)
2583
2584        if maxloc <= instrlen:
2585            match_string = self.match_string
2586            match_stringloc = 0
2587            mismatches = []
2588            maxMismatches = self.maxMismatches
2589
2590            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2591                src,mat = s_m
2592                if src != mat:
2593                    mismatches.append(match_stringloc)
2594                    if len(mismatches) > maxMismatches:
2595                        break
2596            else:
2597                loc = match_stringloc + 1
2598                results = ParseResults([instring[start:loc]])
2599                results['original'] = self.match_string
2600                results['mismatches'] = mismatches
2601                return loc, results
2602
2603        raise ParseException(instring, loc, self.errmsg, self)
2604
2605
2606class Word(Token):
2607    """
2608    Token for matching words composed of allowed character sets.
2609    Defined with string containing all allowed initial characters,
2610    an optional string containing allowed body characters (if omitted,
2611    defaults to the initial character set), and an optional minimum,
2612    maximum, and/or exact length.  The default value for C{min} is 1 (a
2613    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2614    are 0, meaning no maximum or exact length restriction. An optional
2615    C{excludeChars} parameter can list characters that might be found in
2616    the input C{bodyChars} string; useful to define a word of all printables
2617    except for one or two characters, for instance.
2618
2619    L{srange} is useful for defining custom character set strings for defining
2620    C{Word} expressions, using range notation from regular expression character sets.
2621
2622    A common mistake is to use C{Word} to match a specific literal string, as in
2623    C{Word("Address")}. Remember that C{Word} uses the string argument to define
2624    I{sets} of matchable characters. This expression would match "Add", "AAA",
2625    "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2626    To match an exact literal string, use L{Literal} or L{Keyword}.
2627
2628    pyparsing includes helper strings for building Words:
2629     - L{alphas}
2630     - L{nums}
2631     - L{alphanums}
2632     - L{hexnums}
2633     - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2634     - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2635     - L{printables} (any non-whitespace character)
2636
2637    Example::
2638        # a word composed of digits
2639        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2640
2641        # a word with a leading capital, and zero or more lowercase
2642        capital_word = Word(alphas.upper(), alphas.lower())
2643
2644        # hostnames are alphanumeric, with leading alpha, and '-'
2645        hostname = Word(alphas, alphanums+'-')
2646
2647        # roman numeral (not a strict parser, accepts invalid mix of characters)
2648        roman = Word("IVXLCDM")
2649
2650        # any string of non-whitespace characters, except for ','
2651        csv_value = Word(printables, excludeChars=",")
2652    """
2653    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2654        super(Word,self).__init__()
2655        if excludeChars:
2656            initChars = ''.join(c for c in initChars if c not in excludeChars)
2657            if bodyChars:
2658                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2659        self.initCharsOrig = initChars
2660        self.initChars = set(initChars)
2661        if bodyChars :
2662            self.bodyCharsOrig = bodyChars
2663            self.bodyChars = set(bodyChars)
2664        else:
2665            self.bodyCharsOrig = initChars
2666            self.bodyChars = set(initChars)
2667
2668        self.maxSpecified = max > 0
2669
2670        if min < 1:
2671            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2672
2673        self.minLen = min
2674
2675        if max > 0:
2676            self.maxLen = max
2677        else:
2678            self.maxLen = _MAX_INT
2679
2680        if exact > 0:
2681            self.maxLen = exact
2682            self.minLen = exact
2683
2684        self.name = _ustr(self)
2685        self.errmsg = "Expected " + self.name
2686        self.mayIndexError = False
2687        self.asKeyword = asKeyword
2688
2689        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2690            if self.bodyCharsOrig == self.initCharsOrig:
2691                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2692            elif len(self.initCharsOrig) == 1:
2693                self.reString = "%s[%s]*" % \
2694                                      (re.escape(self.initCharsOrig),
2695                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2696            else:
2697                self.reString = "[%s][%s]*" % \
2698                                      (_escapeRegexRangeChars(self.initCharsOrig),
2699                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2700            if self.asKeyword:
2701                self.reString = r"\b"+self.reString+r"\b"
2702            try:
2703                self.re = re.compile( self.reString )
2704            except Exception:
2705                self.re = None
2706
2707    def parseImpl( self, instring, loc, doActions=True ):
2708        if self.re:
2709            result = self.re.match(instring,loc)
2710            if not result:
2711                raise ParseException(instring, loc, self.errmsg, self)
2712
2713            loc = result.end()
2714            return loc, result.group()
2715
2716        if not(instring[ loc ] in self.initChars):
2717            raise ParseException(instring, loc, self.errmsg, self)
2718
2719        start = loc
2720        loc += 1
2721        instrlen = len(instring)
2722        bodychars = self.bodyChars
2723        maxloc = start + self.maxLen
2724        maxloc = min( maxloc, instrlen )
2725        while loc < maxloc and instring[loc] in bodychars:
2726            loc += 1
2727
2728        throwException = False
2729        if loc - start < self.minLen:
2730            throwException = True
2731        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2732            throwException = True
2733        if self.asKeyword:
2734            if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2735                throwException = True
2736
2737        if throwException:
2738            raise ParseException(instring, loc, self.errmsg, self)
2739
2740        return loc, instring[start:loc]
2741
2742    def __str__( self ):
2743        try:
2744            return super(Word,self).__str__()
2745        except Exception:
2746            pass
2747
2748
2749        if self.strRepr is None:
2750
2751            def charsAsStr(s):
2752                if len(s)>4:
2753                    return s[:4]+"..."
2754                else:
2755                    return s
2756
2757            if ( self.initCharsOrig != self.bodyCharsOrig ):
2758                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2759            else:
2760                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2761
2762        return self.strRepr
2763
2764
2765class Regex(Token):
2766    r"""
2767    Token for matching strings that match a given regular expression.
2768    Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2769    If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2770    named parse results.
2771
2772    Example::
2773        realnum = Regex(r"[+-]?\d+\.\d*")
2774        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2775        # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2776        roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2777    """
2778    compiledREtype = type(re.compile("[A-Z]"))
2779    def __init__( self, pattern, flags=0):
2780        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2781        super(Regex,self).__init__()
2782
2783        if isinstance(pattern, basestring):
2784            if not pattern:
2785                warnings.warn("null string passed to Regex; use Empty() instead",
2786                        SyntaxWarning, stacklevel=2)
2787
2788            self.pattern = pattern
2789            self.flags = flags
2790
2791            try:
2792                self.re = re.compile(self.pattern, self.flags)
2793                self.reString = self.pattern
2794            except sre_constants.error:
2795                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2796                    SyntaxWarning, stacklevel=2)
2797                raise
2798
2799        elif isinstance(pattern, Regex.compiledREtype):
2800            self.re = pattern
2801            self.pattern = \
2802            self.reString = str(pattern)
2803            self.flags = flags
2804
2805        else:
2806            raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2807
2808        self.name = _ustr(self)
2809        self.errmsg = "Expected " + self.name
2810        self.mayIndexError = False
2811        self.mayReturnEmpty = True
2812
2813    def parseImpl( self, instring, loc, doActions=True ):
2814        result = self.re.match(instring,loc)
2815        if not result:
2816            raise ParseException(instring, loc, self.errmsg, self)
2817
2818        loc = result.end()
2819        d = result.groupdict()
2820        ret = ParseResults(result.group())
2821        if d:
2822            for k in d:
2823                ret[k] = d[k]
2824        return loc,ret
2825
2826    def __str__( self ):
2827        try:
2828            return super(Regex,self).__str__()
2829        except Exception:
2830            pass
2831
2832        if self.strRepr is None:
2833            self.strRepr = "Re:(%s)" % repr(self.pattern)
2834
2835        return self.strRepr
2836
2837
2838class QuotedString(Token):
2839    r"""
2840    Token for matching strings that are delimited by quoting characters.
2841
2842    Defined with the following parameters:
2843        - quoteChar - string of one or more characters defining the quote delimiting string
2844        - escChar - character to escape quotes, typically backslash (default=C{None})
2845        - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2846        - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2847        - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2848        - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2849        - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2850
2851    Example::
2852        qs = QuotedString('"')
2853        print(qs.searchString('lsjdf "This is the quote" sldjf'))
2854        complex_qs = QuotedString('{{', endQuoteChar='}}')
2855        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2856        sql_qs = QuotedString('"', escQuote='""')
2857        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2858    prints::
2859        [['This is the quote']]
2860        [['This is the "quote"']]
2861        [['This is the quote with "embedded" quotes']]
2862    """
2863    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2864        super(QuotedString,self).__init__()
2865
2866        # remove white space from quote chars - wont work anyway
2867        quoteChar = quoteChar.strip()
2868        if not quoteChar:
2869            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2870            raise SyntaxError()
2871
2872        if endQuoteChar is None:
2873            endQuoteChar = quoteChar
2874        else:
2875            endQuoteChar = endQuoteChar.strip()
2876            if not endQuoteChar:
2877                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2878                raise SyntaxError()
2879
2880        self.quoteChar = quoteChar
2881        self.quoteCharLen = len(quoteChar)
2882        self.firstQuoteChar = quoteChar[0]
2883        self.endQuoteChar = endQuoteChar
2884        self.endQuoteCharLen = len(endQuoteChar)
2885        self.escChar = escChar
2886        self.escQuote = escQuote
2887        self.unquoteResults = unquoteResults
2888        self.convertWhitespaceEscapes = convertWhitespaceEscapes
2889
2890        if multiline:
2891            self.flags = re.MULTILINE | re.DOTALL
2892            self.pattern = r'%s(?:[^%s%s]' % \
2893                ( re.escape(self.quoteChar),
2894                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2895                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2896        else:
2897            self.flags = 0
2898            self.pattern = r'%s(?:[^%s\n\r%s]' % \
2899                ( re.escape(self.quoteChar),
2900                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2901                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2902        if len(self.endQuoteChar) > 1:
2903            self.pattern += (
2904                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2905                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
2906                                    for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2907                )
2908        if escQuote:
2909            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2910        if escChar:
2911            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2912            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2913        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2914
2915        try:
2916            self.re = re.compile(self.pattern, self.flags)
2917            self.reString = self.pattern
2918        except sre_constants.error:
2919            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2920                SyntaxWarning, stacklevel=2)
2921            raise
2922
2923        self.name = _ustr(self)
2924        self.errmsg = "Expected " + self.name
2925        self.mayIndexError = False
2926        self.mayReturnEmpty = True
2927
2928    def parseImpl( self, instring, loc, doActions=True ):
2929        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2930        if not result:
2931            raise ParseException(instring, loc, self.errmsg, self)
2932
2933        loc = result.end()
2934        ret = result.group()
2935
2936        if self.unquoteResults:
2937
2938            # strip off quotes
2939            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2940
2941            if isinstance(ret,basestring):
2942                # replace escaped whitespace
2943                if '\\' in ret and self.convertWhitespaceEscapes:
2944                    ws_map = {
2945                        r'\t' : '\t',
2946                        r'\n' : '\n',
2947                        r'\f' : '\f',
2948                        r'\r' : '\r',
2949                    }
2950                    for wslit,wschar in ws_map.items():
2951                        ret = ret.replace(wslit, wschar)
2952
2953                # replace escaped characters
2954                if self.escChar:
2955                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
2956
2957                # replace escaped quotes
2958                if self.escQuote:
2959                    ret = ret.replace(self.escQuote, self.endQuoteChar)
2960
2961        return loc, ret
2962
2963    def __str__( self ):
2964        try:
2965            return super(QuotedString,self).__str__()
2966        except Exception:
2967            pass
2968
2969        if self.strRepr is None:
2970            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2971
2972        return self.strRepr
2973
2974
2975class CharsNotIn(Token):
2976    """
2977    Token for matching words composed of characters I{not} in a given set (will
2978    include whitespace in matched characters if not listed in the provided exclusion set - see example).
2979    Defined with string containing all disallowed characters, and an optional
2980    minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a
2981    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2982    are 0, meaning no maximum or exact length restriction.
2983
2984    Example::
2985        # define a comma-separated-value as anything that is not a ','
2986        csv_value = CharsNotIn(',')
2987        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2988    prints::
2989        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2990    """
2991    def __init__( self, notChars, min=1, max=0, exact=0 ):
2992        super(CharsNotIn,self).__init__()
2993        self.skipWhitespace = False
2994        self.notChars = notChars
2995
2996        if min < 1:
2997            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2998
2999        self.minLen = min
3000
3001        if max > 0:
3002            self.maxLen = max
3003        else:
3004            self.maxLen = _MAX_INT
3005
3006        if exact > 0:
3007            self.maxLen = exact
3008            self.minLen = exact
3009
3010        self.name = _ustr(self)
3011        self.errmsg = "Expected " + self.name
3012        self.mayReturnEmpty = ( self.minLen == 0 )
3013        self.mayIndexError = False
3014
3015    def parseImpl( self, instring, loc, doActions=True ):
3016        if instring[loc] in self.notChars:
3017            raise ParseException(instring, loc, self.errmsg, self)
3018
3019        start = loc
3020        loc += 1
3021        notchars = self.notChars
3022        maxlen = min( start+self.maxLen, len(instring) )
3023        while loc < maxlen and \
3024              (instring[loc] not in notchars):
3025            loc += 1
3026
3027        if loc - start < self.minLen:
3028            raise ParseException(instring, loc, self.errmsg, self)
3029
3030        return loc, instring[start:loc]
3031
3032    def __str__( self ):
3033        try:
3034            return super(CharsNotIn, self).__str__()
3035        except Exception:
3036            pass
3037
3038        if self.strRepr is None:
3039            if len(self.notChars) > 4:
3040                self.strRepr = "!W:(%s...)" % self.notChars[:4]
3041            else:
3042                self.strRepr = "!W:(%s)" % self.notChars
3043
3044        return self.strRepr
3045
3046class White(Token):
3047    """
3048    Special matching class for matching whitespace.  Normally, whitespace is ignored
3049    by pyparsing grammars.  This class is included when some whitespace structures
3050    are significant.  Define with a string containing the whitespace characters to be
3051    matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments,
3052    as defined for the C{L{Word}} class.
3053    """
3054    whiteStrs = {
3055        " " : "<SPC>",
3056        "\t": "<TAB>",
3057        "\n": "<LF>",
3058        "\r": "<CR>",
3059        "\f": "<FF>",
3060        }
3061    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3062        super(White,self).__init__()
3063        self.matchWhite = ws
3064        self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3065        #~ self.leaveWhitespace()
3066        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3067        self.mayReturnEmpty = True
3068        self.errmsg = "Expected " + self.name
3069
3070        self.minLen = min
3071
3072        if max > 0:
3073            self.maxLen = max
3074        else:
3075            self.maxLen = _MAX_INT
3076
3077        if exact > 0:
3078            self.maxLen = exact
3079            self.minLen = exact
3080
3081    def parseImpl( self, instring, loc, doActions=True ):
3082        if not(instring[ loc ] in self.matchWhite):
3083            raise ParseException(instring, loc, self.errmsg, self)
3084        start = loc
3085        loc += 1
3086        maxloc = start + self.maxLen
3087        maxloc = min( maxloc, len(instring) )
3088        while loc < maxloc and instring[loc] in self.matchWhite:
3089            loc += 1
3090
3091        if loc - start < self.minLen:
3092            raise ParseException(instring, loc, self.errmsg, self)
3093
3094        return loc, instring[start:loc]
3095
3096
3097class _PositionToken(Token):
3098    def __init__( self ):
3099        super(_PositionToken,self).__init__()
3100        self.name=self.__class__.__name__
3101        self.mayReturnEmpty = True
3102        self.mayIndexError = False
3103
3104class GoToColumn(_PositionToken):
3105    """
3106    Token to advance to a specific column of input text; useful for tabular report scraping.
3107    """
3108    def __init__( self, colno ):
3109        super(GoToColumn,self).__init__()
3110        self.col = colno
3111
3112    def preParse( self, instring, loc ):
3113        if col(loc,instring) != self.col:
3114            instrlen = len(instring)
3115            if self.ignoreExprs:
3116                loc = self._skipIgnorables( instring, loc )
3117            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3118                loc += 1
3119        return loc
3120
3121    def parseImpl( self, instring, loc, doActions=True ):
3122        thiscol = col( loc, instring )
3123        if thiscol > self.col:
3124            raise ParseException( instring, loc, "Text not in expected column", self )
3125        newloc = loc + self.col - thiscol
3126        ret = instring[ loc: newloc ]
3127        return newloc, ret
3128
3129
3130class LineStart(_PositionToken):
3131    """
3132    Matches if current position is at the beginning of a line within the parse string
3133
3134    Example::
3135
3136        test = '''\
3137        AAA this line
3138        AAA and this line
3139          AAA but not this one
3140        B AAA and definitely not this one
3141        '''
3142
3143        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3144            print(t)
3145
3146    Prints::
3147        ['AAA', ' this line']
3148        ['AAA', ' and this line']
3149
3150    """
3151    def __init__( self ):
3152        super(LineStart,self).__init__()
3153        self.errmsg = "Expected start of line"
3154
3155    def parseImpl( self, instring, loc, doActions=True ):
3156        if col(loc, instring) == 1:
3157            return loc, []
3158        raise ParseException(instring, loc, self.errmsg, self)
3159
3160class LineEnd(_PositionToken):
3161    """
3162    Matches if current position is at the end of a line within the parse string
3163    """
3164    def __init__( self ):
3165        super(LineEnd,self).__init__()
3166        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
3167        self.errmsg = "Expected end of line"
3168
3169    def parseImpl( self, instring, loc, doActions=True ):
3170        if loc<len(instring):
3171            if instring[loc] == "\n":
3172                return loc+1, "\n"
3173            else:
3174                raise ParseException(instring, loc, self.errmsg, self)
3175        elif loc == len(instring):
3176            return loc+1, []
3177        else:
3178            raise ParseException(instring, loc, self.errmsg, self)
3179
3180class StringStart(_PositionToken):
3181    """
3182    Matches if current position is at the beginning of the parse string
3183    """
3184    def __init__( self ):
3185        super(StringStart,self).__init__()
3186        self.errmsg = "Expected start of text"
3187
3188    def parseImpl( self, instring, loc, doActions=True ):
3189        if loc != 0:
3190            # see if entire string up to here is just whitespace and ignoreables
3191            if loc != self.preParse( instring, 0 ):
3192                raise ParseException(instring, loc, self.errmsg, self)
3193        return loc, []
3194
3195class StringEnd(_PositionToken):
3196    """
3197    Matches if current position is at the end of the parse string
3198    """
3199    def __init__( self ):
3200        super(StringEnd,self).__init__()
3201        self.errmsg = "Expected end of text"
3202
3203    def parseImpl( self, instring, loc, doActions=True ):
3204        if loc < len(instring):
3205            raise ParseException(instring, loc, self.errmsg, self)
3206        elif loc == len(instring):
3207            return loc+1, []
3208        elif loc > len(instring):
3209            return loc, []
3210        else:
3211            raise ParseException(instring, loc, self.errmsg, self)
3212
3213class WordStart(_PositionToken):
3214    """
3215    Matches if the current position is at the beginning of a Word, and
3216    is not preceded by any character in a given set of C{wordChars}
3217    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3218    use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3219    the string being parsed, or at the beginning of a line.
3220    """
3221    def __init__(self, wordChars = printables):
3222        super(WordStart,self).__init__()
3223        self.wordChars = set(wordChars)
3224        self.errmsg = "Not at the start of a word"
3225
3226    def parseImpl(self, instring, loc, doActions=True ):
3227        if loc != 0:
3228            if (instring[loc-1] in self.wordChars or
3229                instring[loc] not in self.wordChars):
3230                raise ParseException(instring, loc, self.errmsg, self)
3231        return loc, []
3232
3233class WordEnd(_PositionToken):
3234    """
3235    Matches if the current position is at the end of a Word, and
3236    is not followed by any character in a given set of C{wordChars}
3237    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3238    use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3239    the string being parsed, or at the end of a line.
3240    """
3241    def __init__(self, wordChars = printables):
3242        super(WordEnd,self).__init__()
3243        self.wordChars = set(wordChars)
3244        self.skipWhitespace = False
3245        self.errmsg = "Not at the end of a word"
3246
3247    def parseImpl(self, instring, loc, doActions=True ):
3248        instrlen = len(instring)
3249        if instrlen>0 and loc<instrlen:
3250            if (instring[loc] in self.wordChars or
3251                instring[loc-1] not in self.wordChars):
3252                raise ParseException(instring, loc, self.errmsg, self)
3253        return loc, []
3254
3255
3256class ParseExpression(ParserElement):
3257    """
3258    Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3259    """
3260    def __init__( self, exprs, savelist = False ):
3261        super(ParseExpression,self).__init__(savelist)
3262        if isinstance( exprs, _generatorType ):
3263            exprs = list(exprs)
3264
3265        if isinstance( exprs, basestring ):
3266            self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3267        elif isinstance( exprs, Iterable ):
3268            exprs = list(exprs)
3269            # if sequence of strings provided, wrap with Literal
3270            if all(isinstance(expr, basestring) for expr in exprs):
3271                exprs = map(ParserElement._literalStringClass, exprs)
3272            self.exprs = list(exprs)
3273        else:
3274            try:
3275                self.exprs = list( exprs )
3276            except TypeError:
3277                self.exprs = [ exprs ]
3278        self.callPreparse = False
3279
3280    def __getitem__( self, i ):
3281        return self.exprs[i]
3282
3283    def append( self, other ):
3284        self.exprs.append( other )
3285        self.strRepr = None
3286        return self
3287
3288    def leaveWhitespace( self ):
3289        """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3290           all contained expressions."""
3291        self.skipWhitespace = False
3292        self.exprs = [ e.copy() for e in self.exprs ]
3293        for e in self.exprs:
3294            e.leaveWhitespace()
3295        return self
3296
3297    def ignore( self, other ):
3298        if isinstance( other, Suppress ):
3299            if other not in self.ignoreExprs:
3300                super( ParseExpression, self).ignore( other )
3301                for e in self.exprs:
3302                    e.ignore( self.ignoreExprs[-1] )
3303        else:
3304            super( ParseExpression, self).ignore( other )
3305            for e in self.exprs:
3306                e.ignore( self.ignoreExprs[-1] )
3307        return self
3308
3309    def __str__( self ):
3310        try:
3311            return super(ParseExpression,self).__str__()
3312        except Exception:
3313            pass
3314
3315        if self.strRepr is None:
3316            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3317        return self.strRepr
3318
3319    def streamline( self ):
3320        super(ParseExpression,self).streamline()
3321
3322        for e in self.exprs:
3323            e.streamline()
3324
3325        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
3326        # but only if there are no parse actions or resultsNames on the nested And's
3327        # (likewise for Or's and MatchFirst's)
3328        if ( len(self.exprs) == 2 ):
3329            other = self.exprs[0]
3330            if ( isinstance( other, self.__class__ ) and
3331                  not(other.parseAction) and
3332                  other.resultsName is None and
3333                  not other.debug ):
3334                self.exprs = other.exprs[:] + [ self.exprs[1] ]
3335                self.strRepr = None
3336                self.mayReturnEmpty |= other.mayReturnEmpty
3337                self.mayIndexError  |= other.mayIndexError
3338
3339            other = self.exprs[-1]
3340            if ( isinstance( other, self.__class__ ) and
3341                  not(other.parseAction) and
3342                  other.resultsName is None and
3343                  not other.debug ):
3344                self.exprs = self.exprs[:-1] + other.exprs[:]
3345                self.strRepr = None
3346                self.mayReturnEmpty |= other.mayReturnEmpty
3347                self.mayIndexError  |= other.mayIndexError
3348
3349        self.errmsg = "Expected " + _ustr(self)
3350
3351        return self
3352
3353    def setResultsName( self, name, listAllMatches=False ):
3354        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
3355        return ret
3356
3357    def validate( self, validateTrace=[] ):
3358        tmp = validateTrace[:]+[self]
3359        for e in self.exprs:
3360            e.validate(tmp)
3361        self.checkRecursion( [] )
3362
3363    def copy(self):
3364        ret = super(ParseExpression,self).copy()
3365        ret.exprs = [e.copy() for e in self.exprs]
3366        return ret
3367
3368class And(ParseExpression):
3369    """
3370    Requires all given C{ParseExpression}s to be found in the given order.
3371    Expressions may be separated by whitespace.
3372    May be constructed using the C{'+'} operator.
3373    May also be constructed using the C{'-'} operator, which will suppress backtracking.
3374
3375    Example::
3376        integer = Word(nums)
3377        name_expr = OneOrMore(Word(alphas))
3378
3379        expr = And([integer("id"),name_expr("name"),integer("age")])
3380        # more easily written as:
3381        expr = integer("id") + name_expr("name") + integer("age")
3382    """
3383
3384    class _ErrorStop(Empty):
3385        def __init__(self, *args, **kwargs):
3386            super(And._ErrorStop,self).__init__(*args, **kwargs)
3387            self.name = '-'
3388            self.leaveWhitespace()
3389
3390    def __init__( self, exprs, savelist = True ):
3391        super(And,self).__init__(exprs, savelist)
3392        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3393        self.setWhitespaceChars( self.exprs[0].whiteChars )
3394        self.skipWhitespace = self.exprs[0].skipWhitespace
3395        self.callPreparse = True
3396
3397    def parseImpl( self, instring, loc, doActions=True ):
3398        # pass False as last arg to _parse for first element, since we already
3399        # pre-parsed the string as part of our And pre-parsing
3400        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3401        errorStop = False
3402        for e in self.exprs[1:]:
3403            if isinstance(e, And._ErrorStop):
3404                errorStop = True
3405                continue
3406            if errorStop:
3407                try:
3408                    loc, exprtokens = e._parse( instring, loc, doActions )
3409                except ParseSyntaxException:
3410                    raise
3411                except ParseBaseException as pe:
3412                    pe.__traceback__ = None
3413                    raise ParseSyntaxException._from_exception(pe)
3414                except IndexError:
3415                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3416            else:
3417                loc, exprtokens = e._parse( instring, loc, doActions )
3418            if exprtokens or exprtokens.haskeys():
3419                resultlist += exprtokens
3420        return loc, resultlist
3421
3422    def __iadd__(self, other ):
3423        if isinstance( other, basestring ):
3424            other = ParserElement._literalStringClass( other )
3425        return self.append( other ) #And( [ self, other ] )
3426
3427    def checkRecursion( self, parseElementList ):
3428        subRecCheckList = parseElementList[:] + [ self ]
3429        for e in self.exprs:
3430            e.checkRecursion( subRecCheckList )
3431            if not e.mayReturnEmpty:
3432                break
3433
3434    def __str__( self ):
3435        if hasattr(self,"name"):
3436            return self.name
3437
3438        if self.strRepr is None:
3439            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3440
3441        return self.strRepr
3442
3443
3444class Or(ParseExpression):
3445    """
3446    Requires that at least one C{ParseExpression} is found.
3447    If two expressions match, the expression that matches the longest string will be used.
3448    May be constructed using the C{'^'} operator.
3449
3450    Example::
3451        # construct Or using '^' operator
3452
3453        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3454        print(number.searchString("123 3.1416 789"))
3455    prints::
3456        [['123'], ['3.1416'], ['789']]
3457    """
3458    def __init__( self, exprs, savelist = False ):
3459        super(Or,self).__init__(exprs, savelist)
3460        if self.exprs:
3461            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3462        else:
3463            self.mayReturnEmpty = True
3464
3465    def parseImpl( self, instring, loc, doActions=True ):
3466        maxExcLoc = -1
3467        maxException = None
3468        matches = []
3469        for e in self.exprs:
3470            try:
3471                loc2 = e.tryParse( instring, loc )
3472            except ParseException as err:
3473                err.__traceback__ = None
3474                if err.loc > maxExcLoc:
3475                    maxException = err
3476                    maxExcLoc = err.loc
3477            except IndexError:
3478                if len(instring) > maxExcLoc:
3479                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3480                    maxExcLoc = len(instring)
3481            else:
3482                # save match among all matches, to retry longest to shortest
3483                matches.append((loc2, e))
3484
3485        if matches:
3486            matches.sort(key=lambda x: -x[0])
3487            for _,e in matches:
3488                try:
3489                    return e._parse( instring, loc, doActions )
3490                except ParseException as err:
3491                    err.__traceback__ = None
3492                    if err.loc > maxExcLoc:
3493                        maxException = err
3494                        maxExcLoc = err.loc
3495
3496        if maxException is not None:
3497            maxException.msg = self.errmsg
3498            raise maxException
3499        else:
3500            raise ParseException(instring, loc, "no defined alternatives to match", self)
3501
3502
3503    def __ixor__(self, other ):
3504        if isinstance( other, basestring ):
3505            other = ParserElement._literalStringClass( other )
3506        return self.append( other ) #Or( [ self, other ] )
3507
3508    def __str__( self ):
3509        if hasattr(self,"name"):
3510            return self.name
3511
3512        if self.strRepr is None:
3513            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3514
3515        return self.strRepr
3516
3517    def checkRecursion( self, parseElementList ):
3518        subRecCheckList = parseElementList[:] + [ self ]
3519        for e in self.exprs:
3520            e.checkRecursion( subRecCheckList )
3521
3522
3523class MatchFirst(ParseExpression):
3524    """
3525    Requires that at least one C{ParseExpression} is found.
3526    If two expressions match, the first one listed is the one that will match.
3527    May be constructed using the C{'|'} operator.
3528
3529    Example::
3530        # construct MatchFirst using '|' operator
3531
3532        # watch the order of expressions to match
3533        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3534        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
3535
3536        # put more selective expression first
3537        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3538        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
3539    """
3540    def __init__( self, exprs, savelist = False ):
3541        super(MatchFirst,self).__init__(exprs, savelist)
3542        if self.exprs:
3543            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3544        else:
3545            self.mayReturnEmpty = True
3546
3547    def parseImpl( self, instring, loc, doActions=True ):
3548        maxExcLoc = -1
3549        maxException = None
3550        for e in self.exprs:
3551            try:
3552                ret = e._parse( instring, loc, doActions )
3553                return ret
3554            except ParseException as err:
3555                if err.loc > maxExcLoc:
3556                    maxException = err
3557                    maxExcLoc = err.loc
3558            except IndexError:
3559                if len(instring) > maxExcLoc:
3560                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3561                    maxExcLoc = len(instring)
3562
3563        # only got here if no expression matched, raise exception for match that made it the furthest
3564        else:
3565            if maxException is not None:
3566                maxException.msg = self.errmsg
3567                raise maxException
3568            else:
3569                raise ParseException(instring, loc, "no defined alternatives to match", self)
3570
3571    def __ior__(self, other ):
3572        if isinstance( other, basestring ):
3573            other = ParserElement._literalStringClass( other )
3574        return self.append( other ) #MatchFirst( [ self, other ] )
3575
3576    def __str__( self ):
3577        if hasattr(self,"name"):
3578            return self.name
3579
3580        if self.strRepr is None:
3581            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3582
3583        return self.strRepr
3584
3585    def checkRecursion( self, parseElementList ):
3586        subRecCheckList = parseElementList[:] + [ self ]
3587        for e in self.exprs:
3588            e.checkRecursion( subRecCheckList )
3589
3590
3591class Each(ParseExpression):
3592    """
3593    Requires all given C{ParseExpression}s to be found, but in any order.
3594    Expressions may be separated by whitespace.
3595    May be constructed using the C{'&'} operator.
3596
3597    Example::
3598        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3599        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3600        integer = Word(nums)
3601        shape_attr = "shape:" + shape_type("shape")
3602        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3603        color_attr = "color:" + color("color")
3604        size_attr = "size:" + integer("size")
3605
3606        # use Each (using operator '&') to accept attributes in any order
3607        # (shape and posn are required, color and size are optional)
3608        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3609
3610        shape_spec.runTests('''
3611            shape: SQUARE color: BLACK posn: 100, 120
3612            shape: CIRCLE size: 50 color: BLUE posn: 50,80
3613            color:GREEN size:20 shape:TRIANGLE posn:20,40
3614            '''
3615            )
3616    prints::
3617        shape: SQUARE color: BLACK posn: 100, 120
3618        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3619        - color: BLACK
3620        - posn: ['100', ',', '120']
3621          - x: 100
3622          - y: 120
3623        - shape: SQUARE
3624
3625
3626        shape: CIRCLE size: 50 color: BLUE posn: 50,80
3627        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3628        - color: BLUE
3629        - posn: ['50', ',', '80']
3630          - x: 50
3631          - y: 80
3632        - shape: CIRCLE
3633        - size: 50
3634
3635
3636        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3637        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3638        - color: GREEN
3639        - posn: ['20', ',', '40']
3640          - x: 20
3641          - y: 40
3642        - shape: TRIANGLE
3643        - size: 20
3644    """
3645    def __init__( self, exprs, savelist = True ):
3646        super(Each,self).__init__(exprs, savelist)
3647        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3648        self.skipWhitespace = True
3649        self.initExprGroups = True
3650
3651    def parseImpl( self, instring, loc, doActions=True ):
3652        if self.initExprGroups:
3653            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3654            opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3655            opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3656            self.optionals = opt1 + opt2
3657            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3658            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3659            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3660            self.required += self.multirequired
3661            self.initExprGroups = False
3662        tmpLoc = loc
3663        tmpReqd = self.required[:]
3664        tmpOpt  = self.optionals[:]
3665        matchOrder = []
3666
3667        keepMatching = True
3668        while keepMatching:
3669            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3670            failed = []
3671            for e in tmpExprs:
3672                try:
3673                    tmpLoc = e.tryParse( instring, tmpLoc )
3674                except ParseException:
3675                    failed.append(e)
3676                else:
3677                    matchOrder.append(self.opt1map.get(id(e),e))
3678                    if e in tmpReqd:
3679                        tmpReqd.remove(e)
3680                    elif e in tmpOpt:
3681                        tmpOpt.remove(e)
3682            if len(failed) == len(tmpExprs):
3683                keepMatching = False
3684
3685        if tmpReqd:
3686            missing = ", ".join(_ustr(e) for e in tmpReqd)
3687            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3688
3689        # add any unmatched Optionals, in case they have default values defined
3690        matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3691
3692        resultlist = []
3693        for e in matchOrder:
3694            loc,results = e._parse(instring,loc,doActions)
3695            resultlist.append(results)
3696
3697        finalResults = sum(resultlist, ParseResults([]))
3698        return loc, finalResults
3699
3700    def __str__( self ):
3701        if hasattr(self,"name"):
3702            return self.name
3703
3704        if self.strRepr is None:
3705            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3706
3707        return self.strRepr
3708
3709    def checkRecursion( self, parseElementList ):
3710        subRecCheckList = parseElementList[:] + [ self ]
3711        for e in self.exprs:
3712            e.checkRecursion( subRecCheckList )
3713
3714
3715class ParseElementEnhance(ParserElement):
3716    """
3717    Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3718    """
3719    def __init__( self, expr, savelist=False ):
3720        super(ParseElementEnhance,self).__init__(savelist)
3721        if isinstance( expr, basestring ):
3722            if issubclass(ParserElement._literalStringClass, Token):
3723                expr = ParserElement._literalStringClass(expr)
3724            else:
3725                expr = ParserElement._literalStringClass(Literal(expr))
3726        self.expr = expr
3727        self.strRepr = None
3728        if expr is not None:
3729            self.mayIndexError = expr.mayIndexError
3730            self.mayReturnEmpty = expr.mayReturnEmpty
3731            self.setWhitespaceChars( expr.whiteChars )
3732            self.skipWhitespace = expr.skipWhitespace
3733            self.saveAsList = expr.saveAsList
3734            self.callPreparse = expr.callPreparse
3735            self.ignoreExprs.extend(expr.ignoreExprs)
3736
3737    def parseImpl( self, instring, loc, doActions=True ):
3738        if self.expr is not None:
3739            return self.expr._parse( instring, loc, doActions, callPreParse=False )
3740        else:
3741            raise ParseException("",loc,self.errmsg,self)
3742
3743    def leaveWhitespace( self ):
3744        self.skipWhitespace = False
3745        self.expr = self.expr.copy()
3746        if self.expr is not None:
3747            self.expr.leaveWhitespace()
3748        return self
3749
3750    def ignore( self, other ):
3751        if isinstance( other, Suppress ):
3752            if other not in self.ignoreExprs:
3753                super( ParseElementEnhance, self).ignore( other )
3754                if self.expr is not None:
3755                    self.expr.ignore( self.ignoreExprs[-1] )
3756        else:
3757            super( ParseElementEnhance, self).ignore( other )
3758            if self.expr is not None:
3759                self.expr.ignore( self.ignoreExprs[-1] )
3760        return self
3761
3762    def streamline( self ):
3763        super(ParseElementEnhance,self).streamline()
3764        if self.expr is not None:
3765            self.expr.streamline()
3766        return self
3767
3768    def checkRecursion( self, parseElementList ):
3769        if self in parseElementList:
3770            raise RecursiveGrammarException( parseElementList+[self] )
3771        subRecCheckList = parseElementList[:] + [ self ]
3772        if self.expr is not None:
3773            self.expr.checkRecursion( subRecCheckList )
3774
3775    def validate( self, validateTrace=[] ):
3776        tmp = validateTrace[:]+[self]
3777        if self.expr is not None:
3778            self.expr.validate(tmp)
3779        self.checkRecursion( [] )
3780
3781    def __str__( self ):
3782        try:
3783            return super(ParseElementEnhance,self).__str__()
3784        except Exception:
3785            pass
3786
3787        if self.strRepr is None and self.expr is not None:
3788            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3789        return self.strRepr
3790
3791
3792class FollowedBy(ParseElementEnhance):
3793    """
3794    Lookahead matching of the given parse expression.  C{FollowedBy}
3795    does I{not} advance the parsing position within the input string, it only
3796    verifies that the specified parse expression matches at the current
3797    position.  C{FollowedBy} always returns a null token list.
3798
3799    Example::
3800        # use FollowedBy to match a label only if it is followed by a ':'
3801        data_word = Word(alphas)
3802        label = data_word + FollowedBy(':')
3803        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3804
3805        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3806    prints::
3807        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3808    """
3809    def __init__( self, expr ):
3810        super(FollowedBy,self).__init__(expr)
3811        self.mayReturnEmpty = True
3812
3813    def parseImpl( self, instring, loc, doActions=True ):
3814        self.expr.tryParse( instring, loc )
3815        return loc, []
3816
3817
3818class NotAny(ParseElementEnhance):
3819    """
3820    Lookahead to disallow matching with the given parse expression.  C{NotAny}
3821    does I{not} advance the parsing position within the input string, it only
3822    verifies that the specified parse expression does I{not} match at the current
3823    position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3824    always returns a null token list.  May be constructed using the '~' operator.
3825
3826    Example::
3827
3828    """
3829    def __init__( self, expr ):
3830        super(NotAny,self).__init__(expr)
3831        #~ self.leaveWhitespace()
3832        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
3833        self.mayReturnEmpty = True
3834        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3835
3836    def parseImpl( self, instring, loc, doActions=True ):
3837        if self.expr.canParseNext(instring, loc):
3838            raise ParseException(instring, loc, self.errmsg, self)
3839        return loc, []
3840
3841    def __str__( self ):
3842        if hasattr(self,"name"):
3843            return self.name
3844
3845        if self.strRepr is None:
3846            self.strRepr = "~{" + _ustr(self.expr) + "}"
3847
3848        return self.strRepr
3849
3850class _MultipleMatch(ParseElementEnhance):
3851    def __init__( self, expr, stopOn=None):
3852        super(_MultipleMatch, self).__init__(expr)
3853        self.saveAsList = True
3854        ender = stopOn
3855        if isinstance(ender, basestring):
3856            ender = ParserElement._literalStringClass(ender)
3857        self.not_ender = ~ender if ender is not None else None
3858
3859    def parseImpl( self, instring, loc, doActions=True ):
3860        self_expr_parse = self.expr._parse
3861        self_skip_ignorables = self._skipIgnorables
3862        check_ender = self.not_ender is not None
3863        if check_ender:
3864            try_not_ender = self.not_ender.tryParse
3865
3866        # must be at least one (but first see if we are the stopOn sentinel;
3867        # if so, fail)
3868        if check_ender:
3869            try_not_ender(instring, loc)
3870        loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3871        try:
3872            hasIgnoreExprs = (not not self.ignoreExprs)
3873            while 1:
3874                if check_ender:
3875                    try_not_ender(instring, loc)
3876                if hasIgnoreExprs:
3877                    preloc = self_skip_ignorables( instring, loc )
3878                else:
3879                    preloc = loc
3880                loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3881                if tmptokens or tmptokens.haskeys():
3882                    tokens += tmptokens
3883        except (ParseException,IndexError):
3884            pass
3885
3886        return loc, tokens
3887
3888class OneOrMore(_MultipleMatch):
3889    """
3890    Repetition of one or more of the given expression.
3891
3892    Parameters:
3893     - expr - expression that must match one or more times
3894     - stopOn - (default=C{None}) - expression for a terminating sentinel
3895          (only required if the sentinel would ordinarily match the repetition
3896          expression)
3897
3898    Example::
3899        data_word = Word(alphas)
3900        label = data_word + FollowedBy(':')
3901        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3902
3903        text = "shape: SQUARE posn: upper left color: BLACK"
3904        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3905
3906        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3907        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3908        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3909
3910        # could also be written as
3911        (attr_expr * (1,)).parseString(text).pprint()
3912    """
3913
3914    def __str__( self ):
3915        if hasattr(self,"name"):
3916            return self.name
3917
3918        if self.strRepr is None:
3919            self.strRepr = "{" + _ustr(self.expr) + "}..."
3920
3921        return self.strRepr
3922
3923class ZeroOrMore(_MultipleMatch):
3924    """
3925    Optional repetition of zero or more of the given expression.
3926
3927    Parameters:
3928     - expr - expression that must match zero or more times
3929     - stopOn - (default=C{None}) - expression for a terminating sentinel
3930          (only required if the sentinel would ordinarily match the repetition
3931          expression)
3932
3933    Example: similar to L{OneOrMore}
3934    """
3935    def __init__( self, expr, stopOn=None):
3936        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
3937        self.mayReturnEmpty = True
3938
3939    def parseImpl( self, instring, loc, doActions=True ):
3940        try:
3941            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
3942        except (ParseException,IndexError):
3943            return loc, []
3944
3945    def __str__( self ):
3946        if hasattr(self,"name"):
3947            return self.name
3948
3949        if self.strRepr is None:
3950            self.strRepr = "[" + _ustr(self.expr) + "]..."
3951
3952        return self.strRepr
3953
3954class _NullToken(object):
3955    def __bool__(self):
3956        return False
3957    __nonzero__ = __bool__
3958    def __str__(self):
3959        return ""
3960
3961_optionalNotMatched = _NullToken()
3962class Optional(ParseElementEnhance):
3963    """
3964    Optional matching of the given expression.
3965
3966    Parameters:
3967     - expr - expression that must match zero or more times
3968     - default (optional) - value to be returned if the optional expression is not found.
3969
3970    Example::
3971        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3972        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3973        zip.runTests('''
3974            # traditional ZIP code
3975            12345
3976
3977            # ZIP+4 form
3978            12101-0001
3979
3980            # invalid ZIP
3981            98765-
3982            ''')
3983    prints::
3984        # traditional ZIP code
3985        12345
3986        ['12345']
3987
3988        # ZIP+4 form
3989        12101-0001
3990        ['12101-0001']
3991
3992        # invalid ZIP
3993        98765-
3994             ^
3995        FAIL: Expected end of text (at char 5), (line:1, col:6)
3996    """
3997    def __init__( self, expr, default=_optionalNotMatched ):
3998        super(Optional,self).__init__( expr, savelist=False )
3999        self.saveAsList = self.expr.saveAsList
4000        self.defaultValue = default
4001        self.mayReturnEmpty = True
4002
4003    def parseImpl( self, instring, loc, doActions=True ):
4004        try:
4005            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
4006        except (ParseException,IndexError):
4007            if self.defaultValue is not _optionalNotMatched:
4008                if self.expr.resultsName:
4009                    tokens = ParseResults([ self.defaultValue ])
4010                    tokens[self.expr.resultsName] = self.defaultValue
4011                else:
4012                    tokens = [ self.defaultValue ]
4013            else:
4014                tokens = []
4015        return loc, tokens
4016
4017    def __str__( self ):
4018        if hasattr(self,"name"):
4019            return self.name
4020
4021        if self.strRepr is None:
4022            self.strRepr = "[" + _ustr(self.expr) + "]"
4023
4024        return self.strRepr
4025
4026class SkipTo(ParseElementEnhance):
4027    """
4028    Token for skipping over all undefined text until the matched expression is found.
4029
4030    Parameters:
4031     - expr - target expression marking the end of the data to be skipped
4032     - include - (default=C{False}) if True, the target expression is also parsed
4033          (the skipped text and target expression are returned as a 2-element list).
4034     - ignore - (default=C{None}) used to define grammars (typically quoted strings and
4035          comments) that might contain false matches to the target expression
4036     - failOn - (default=C{None}) define expressions that are not allowed to be
4037          included in the skipped test; if found before the target expression is found,
4038          the SkipTo is not a match
4039
4040    Example::
4041        report = '''
4042            Outstanding Issues Report - 1 Jan 2000
4043
4044               # | Severity | Description                               |  Days Open
4045            -----+----------+-------------------------------------------+-----------
4046             101 | Critical | Intermittent system crash                 |          6
4047              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
4048              79 | Minor    | System slow when running too many reports |         47
4049            '''
4050        integer = Word(nums)
4051        SEP = Suppress('|')
4052        # use SkipTo to simply match everything up until the next SEP
4053        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4054        # - parse action will call token.strip() for each matched token, i.e., the description body
4055        string_data = SkipTo(SEP, ignore=quotedString)
4056        string_data.setParseAction(tokenMap(str.strip))
4057        ticket_expr = (integer("issue_num") + SEP
4058                      + string_data("sev") + SEP
4059                      + string_data("desc") + SEP
4060                      + integer("days_open"))
4061
4062        for tkt in ticket_expr.searchString(report):
4063            print tkt.dump()
4064    prints::
4065        ['101', 'Critical', 'Intermittent system crash', '6']
4066        - days_open: 6
4067        - desc: Intermittent system crash
4068        - issue_num: 101
4069        - sev: Critical
4070        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4071        - days_open: 14
4072        - desc: Spelling error on Login ('log|n')
4073        - issue_num: 94
4074        - sev: Cosmetic
4075        ['79', 'Minor', 'System slow when running too many reports', '47']
4076        - days_open: 47
4077        - desc: System slow when running too many reports
4078        - issue_num: 79
4079        - sev: Minor
4080    """
4081    def __init__( self, other, include=False, ignore=None, failOn=None ):
4082        super( SkipTo, self ).__init__( other )
4083        self.ignoreExpr = ignore
4084        self.mayReturnEmpty = True
4085        self.mayIndexError = False
4086        self.includeMatch = include
4087        self.asList = False
4088        if isinstance(failOn, basestring):
4089            self.failOn = ParserElement._literalStringClass(failOn)
4090        else:
4091            self.failOn = failOn
4092        self.errmsg = "No match found for "+_ustr(self.expr)
4093
4094    def parseImpl( self, instring, loc, doActions=True ):
4095        startloc = loc
4096        instrlen = len(instring)
4097        expr = self.expr
4098        expr_parse = self.expr._parse
4099        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4100        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4101
4102        tmploc = loc
4103        while tmploc <= instrlen:
4104            if self_failOn_canParseNext is not None:
4105                # break if failOn expression matches
4106                if self_failOn_canParseNext(instring, tmploc):
4107                    break
4108
4109            if self_ignoreExpr_tryParse is not None:
4110                # advance past ignore expressions
4111                while 1:
4112                    try:
4113                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4114                    except ParseBaseException:
4115                        break
4116
4117            try:
4118                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4119            except (ParseException, IndexError):
4120                # no match, advance loc in string
4121                tmploc += 1
4122            else:
4123                # matched skipto expr, done
4124                break
4125
4126        else:
4127            # ran off the end of the input string without matching skipto expr, fail
4128            raise ParseException(instring, loc, self.errmsg, self)
4129
4130        # build up return values
4131        loc = tmploc
4132        skiptext = instring[startloc:loc]
4133        skipresult = ParseResults(skiptext)
4134
4135        if self.includeMatch:
4136            loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4137            skipresult += mat
4138
4139        return loc, skipresult
4140
4141class Forward(ParseElementEnhance):
4142    """
4143    Forward declaration of an expression to be defined later -
4144    used for recursive grammars, such as algebraic infix notation.
4145    When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4146
4147    Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4148    Specifically, '|' has a lower precedence than '<<', so that::
4149        fwdExpr << a | b | c
4150    will actually be evaluated as::
4151        (fwdExpr << a) | b | c
4152    thereby leaving b and c out as parseable alternatives.  It is recommended that you
4153    explicitly group the values inserted into the C{Forward}::
4154        fwdExpr << (a | b | c)
4155    Converting to use the '<<=' operator instead will avoid this problem.
4156
4157    See L{ParseResults.pprint} for an example of a recursive parser created using
4158    C{Forward}.
4159    """
4160    def __init__( self, other=None ):
4161        super(Forward,self).__init__( other, savelist=False )
4162
4163    def __lshift__( self, other ):
4164        if isinstance( other, basestring ):
4165            other = ParserElement._literalStringClass(other)
4166        self.expr = other
4167        self.strRepr = None
4168        self.mayIndexError = self.expr.mayIndexError
4169        self.mayReturnEmpty = self.expr.mayReturnEmpty
4170        self.setWhitespaceChars( self.expr.whiteChars )
4171        self.skipWhitespace = self.expr.skipWhitespace
4172        self.saveAsList = self.expr.saveAsList
4173        self.ignoreExprs.extend(self.expr.ignoreExprs)
4174        return self
4175
4176    def __ilshift__(self, other):
4177        return self << other
4178
4179    def leaveWhitespace( self ):
4180        self.skipWhitespace = False
4181        return self
4182
4183    def streamline( self ):
4184        if not self.streamlined:
4185            self.streamlined = True
4186            if self.expr is not None:
4187                self.expr.streamline()
4188        return self
4189
4190    def validate( self, validateTrace=[] ):
4191        if self not in validateTrace:
4192            tmp = validateTrace[:]+[self]
4193            if self.expr is not None:
4194                self.expr.validate(tmp)
4195        self.checkRecursion([])
4196
4197    def __str__( self ):
4198        if hasattr(self,"name"):
4199            return self.name
4200        return self.__class__.__name__ + ": ..."
4201
4202        # stubbed out for now - creates awful memory and perf issues
4203        self._revertClass = self.__class__
4204        self.__class__ = _ForwardNoRecurse
4205        try:
4206            if self.expr is not None:
4207                retString = _ustr(self.expr)
4208            else:
4209                retString = "None"
4210        finally:
4211            self.__class__ = self._revertClass
4212        return self.__class__.__name__ + ": " + retString
4213
4214    def copy(self):
4215        if self.expr is not None:
4216            return super(Forward,self).copy()
4217        else:
4218            ret = Forward()
4219            ret <<= self
4220            return ret
4221
4222class _ForwardNoRecurse(Forward):
4223    def __str__( self ):
4224        return "..."
4225
4226class TokenConverter(ParseElementEnhance):
4227    """
4228    Abstract subclass of C{ParseExpression}, for converting parsed results.
4229    """
4230    def __init__( self, expr, savelist=False ):
4231        super(TokenConverter,self).__init__( expr )#, savelist )
4232        self.saveAsList = False
4233
4234class Combine(TokenConverter):
4235    """
4236    Converter to concatenate all matching tokens to a single string.
4237    By default, the matching patterns must also be contiguous in the input string;
4238    this can be disabled by specifying C{'adjacent=False'} in the constructor.
4239
4240    Example::
4241        real = Word(nums) + '.' + Word(nums)
4242        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4243        # will also erroneously match the following
4244        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4245
4246        real = Combine(Word(nums) + '.' + Word(nums))
4247        print(real.parseString('3.1416')) # -> ['3.1416']
4248        # no match when there are internal spaces
4249        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4250    """
4251    def __init__( self, expr, joinString="", adjacent=True ):
4252        super(Combine,self).__init__( expr )
4253        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
4254        if adjacent:
4255            self.leaveWhitespace()
4256        self.adjacent = adjacent
4257        self.skipWhitespace = True
4258        self.joinString = joinString
4259        self.callPreparse = True
4260
4261    def ignore( self, other ):
4262        if self.adjacent:
4263            ParserElement.ignore(self, other)
4264        else:
4265            super( Combine, self).ignore( other )
4266        return self
4267
4268    def postParse( self, instring, loc, tokenlist ):
4269        retToks = tokenlist.copy()
4270        del retToks[:]
4271        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4272
4273        if self.resultsName and retToks.haskeys():
4274            return [ retToks ]
4275        else:
4276            return retToks
4277
4278class Group(TokenConverter):
4279    """
4280    Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4281
4282    Example::
4283        ident = Word(alphas)
4284        num = Word(nums)
4285        term = ident | num
4286        func = ident + Optional(delimitedList(term))
4287        print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100']
4288
4289        func = ident + Group(Optional(delimitedList(term)))
4290        print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
4291    """
4292    def __init__( self, expr ):
4293        super(Group,self).__init__( expr )
4294        self.saveAsList = True
4295
4296    def postParse( self, instring, loc, tokenlist ):
4297        return [ tokenlist ]
4298
4299class Dict(TokenConverter):
4300    """
4301    Converter to return a repetitive expression as a list, but also as a dictionary.
4302    Each element can also be referenced using the first token in the expression as its key.
4303    Useful for tabular report scraping when the first column can be used as a item key.
4304
4305    Example::
4306        data_word = Word(alphas)
4307        label = data_word + FollowedBy(':')
4308        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4309
4310        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4311        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4312
4313        # print attributes as plain groups
4314        print(OneOrMore(attr_expr).parseString(text).dump())
4315
4316        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4317        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4318        print(result.dump())
4319
4320        # access named fields as dict entries, or output as dict
4321        print(result['shape'])
4322        print(result.asDict())
4323    prints::
4324        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4325
4326        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4327        - color: light blue
4328        - posn: upper left
4329        - shape: SQUARE
4330        - texture: burlap
4331        SQUARE
4332        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4333    See more examples at L{ParseResults} of accessing fields by results name.
4334    """
4335    def __init__( self, expr ):
4336        super(Dict,self).__init__( expr )
4337        self.saveAsList = True
4338
4339    def postParse( self, instring, loc, tokenlist ):
4340        for i,tok in enumerate(tokenlist):
4341            if len(tok) == 0:
4342                continue
4343            ikey = tok[0]
4344            if isinstance(ikey,int):
4345                ikey = _ustr(tok[0]).strip()
4346            if len(tok)==1:
4347                tokenlist[ikey] = _ParseResultsWithOffset("",i)
4348            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4349                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4350            else:
4351                dictvalue = tok.copy() #ParseResults(i)
4352                del dictvalue[0]
4353                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4354                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4355                else:
4356                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4357
4358        if self.resultsName:
4359            return [ tokenlist ]
4360        else:
4361            return tokenlist
4362
4363
4364class Suppress(TokenConverter):
4365    """
4366    Converter for ignoring the results of a parsed expression.
4367
4368    Example::
4369        source = "a, b, c,d"
4370        wd = Word(alphas)
4371        wd_list1 = wd + ZeroOrMore(',' + wd)
4372        print(wd_list1.parseString(source))
4373
4374        # often, delimiters that are useful during parsing are just in the
4375        # way afterward - use Suppress to keep them out of the parsed output
4376        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4377        print(wd_list2.parseString(source))
4378    prints::
4379        ['a', ',', 'b', ',', 'c', ',', 'd']
4380        ['a', 'b', 'c', 'd']
4381    (See also L{delimitedList}.)
4382    """
4383    def postParse( self, instring, loc, tokenlist ):
4384        return []
4385
4386    def suppress( self ):
4387        return self
4388
4389
4390class OnlyOnce(object):
4391    """
4392    Wrapper for parse actions, to ensure they are only called once.
4393    """
4394    def __init__(self, methodCall):
4395        self.callable = _trim_arity(methodCall)
4396        self.called = False
4397    def __call__(self,s,l,t):
4398        if not self.called:
4399            results = self.callable(s,l,t)
4400            self.called = True
4401            return results
4402        raise ParseException(s,l,"")
4403    def reset(self):
4404        self.called = False
4405
4406def traceParseAction(f):
4407    """
4408    Decorator for debugging parse actions.
4409
4410    When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4411    When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4412
4413    Example::
4414        wd = Word(alphas)
4415
4416        @traceParseAction
4417        def remove_duplicate_chars(tokens):
4418            return ''.join(sorted(set(''.join(tokens))))
4419
4420        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4421        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4422    prints::
4423        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4424        <<leaving remove_duplicate_chars (ret: 'dfjkls')
4425        ['dfjkls']
4426    """
4427    f = _trim_arity(f)
4428    def z(*paArgs):
4429        thisFunc = f.__name__
4430        s,l,t = paArgs[-3:]
4431        if len(paArgs)>3:
4432            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4433        sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4434        try:
4435            ret = f(*paArgs)
4436        except Exception as exc:
4437            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4438            raise
4439        sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4440        return ret
4441    try:
4442        z.__name__ = f.__name__
4443    except AttributeError:
4444        pass
4445    return z
4446
4447#
4448# global helpers
4449#
4450def delimitedList( expr, delim=",", combine=False ):
4451    """
4452    Helper to define a delimited list of expressions - the delimiter defaults to ','.
4453    By default, the list elements and delimiters can have intervening whitespace, and
4454    comments, but this can be overridden by passing C{combine=True} in the constructor.
4455    If C{combine} is set to C{True}, the matching tokens are returned as a single token
4456    string, with the delimiters included; otherwise, the matching tokens are returned
4457    as a list of tokens, with the delimiters suppressed.
4458
4459    Example::
4460        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4461        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4462    """
4463    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4464    if combine:
4465        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4466    else:
4467        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4468
4469def countedArray( expr, intExpr=None ):
4470    """
4471    Helper to define a counted list of expressions.
4472    This helper defines a pattern of the form::
4473        integer expr expr expr...
4474    where the leading integer tells how many expr expressions follow.
4475    The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4476
4477    If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4478
4479    Example::
4480        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
4481
4482        # in this parser, the leading integer value is given in binary,
4483        # '10' indicating that 2 values are in the array
4484        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4485        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
4486    """
4487    arrayExpr = Forward()
4488    def countFieldParseAction(s,l,t):
4489        n = t[0]
4490        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4491        return []
4492    if intExpr is None:
4493        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4494    else:
4495        intExpr = intExpr.copy()
4496    intExpr.setName("arrayLen")
4497    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4498    return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4499
4500def _flatten(L):
4501    ret = []
4502    for i in L:
4503        if isinstance(i,list):
4504            ret.extend(_flatten(i))
4505        else:
4506            ret.append(i)
4507    return ret
4508
4509def matchPreviousLiteral(expr):
4510    """
4511    Helper to define an expression that is indirectly defined from
4512    the tokens matched in a previous expression, that is, it looks
4513    for a 'repeat' of a previous expression.  For example::
4514        first = Word(nums)
4515        second = matchPreviousLiteral(first)
4516        matchExpr = first + ":" + second
4517    will match C{"1:1"}, but not C{"1:2"}.  Because this matches a
4518    previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4519    If this is not desired, use C{matchPreviousExpr}.
4520    Do I{not} use with packrat parsing enabled.
4521    """
4522    rep = Forward()
4523    def copyTokenToRepeater(s,l,t):
4524        if t:
4525            if len(t) == 1:
4526                rep << t[0]
4527            else:
4528                # flatten t tokens
4529                tflat = _flatten(t.asList())
4530                rep << And(Literal(tt) for tt in tflat)
4531        else:
4532            rep << Empty()
4533    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4534    rep.setName('(prev) ' + _ustr(expr))
4535    return rep
4536
4537def matchPreviousExpr(expr):
4538    """
4539    Helper to define an expression that is indirectly defined from
4540    the tokens matched in a previous expression, that is, it looks
4541    for a 'repeat' of a previous expression.  For example::
4542        first = Word(nums)
4543        second = matchPreviousExpr(first)
4544        matchExpr = first + ":" + second
4545    will match C{"1:1"}, but not C{"1:2"}.  Because this matches by
4546    expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4547    the expressions are evaluated first, and then compared, so
4548    C{"1"} is compared with C{"10"}.
4549    Do I{not} use with packrat parsing enabled.
4550    """
4551    rep = Forward()
4552    e2 = expr.copy()
4553    rep <<= e2
4554    def copyTokenToRepeater(s,l,t):
4555        matchTokens = _flatten(t.asList())
4556        def mustMatchTheseTokens(s,l,t):
4557            theseTokens = _flatten(t.asList())
4558            if  theseTokens != matchTokens:
4559                raise ParseException("",0,"")
4560        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4561    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4562    rep.setName('(prev) ' + _ustr(expr))
4563    return rep
4564
4565def _escapeRegexRangeChars(s):
4566    #~  escape these chars: ^-]
4567    for c in r"\^-]":
4568        s = s.replace(c,_bslash+c)
4569    s = s.replace("\n",r"\n")
4570    s = s.replace("\t",r"\t")
4571    return _ustr(s)
4572
4573def oneOf( strs, caseless=False, useRegex=True ):
4574    """
4575    Helper to quickly define a set of alternative Literals, and makes sure to do
4576    longest-first testing when there is a conflict, regardless of the input order,
4577    but returns a C{L{MatchFirst}} for best performance.
4578
4579    Parameters:
4580     - strs - a string of space-delimited literals, or a collection of string literals
4581     - caseless - (default=C{False}) - treat all literals as caseless
4582     - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4583          object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4584          if creating a C{Regex} raises an exception)
4585
4586    Example::
4587        comp_oper = oneOf("< = > <= >= !=")
4588        var = Word(alphas)
4589        number = Word(nums)
4590        term = var | number
4591        comparison_expr = term + comp_oper + term
4592        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
4593    prints::
4594        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4595    """
4596    if caseless:
4597        isequal = ( lambda a,b: a.upper() == b.upper() )
4598        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4599        parseElementClass = CaselessLiteral
4600    else:
4601        isequal = ( lambda a,b: a == b )
4602        masks = ( lambda a,b: b.startswith(a) )
4603        parseElementClass = Literal
4604
4605    symbols = []
4606    if isinstance(strs,basestring):
4607        symbols = strs.split()
4608    elif isinstance(strs, Iterable):
4609        symbols = list(strs)
4610    else:
4611        warnings.warn("Invalid argument to oneOf, expected string or iterable",
4612                SyntaxWarning, stacklevel=2)
4613    if not symbols:
4614        return NoMatch()
4615
4616    i = 0
4617    while i < len(symbols)-1:
4618        cur = symbols[i]
4619        for j,other in enumerate(symbols[i+1:]):
4620            if ( isequal(other, cur) ):
4621                del symbols[i+j+1]
4622                break
4623            elif ( masks(cur, other) ):
4624                del symbols[i+j+1]
4625                symbols.insert(i,other)
4626                cur = other
4627                break
4628        else:
4629            i += 1
4630
4631    if not caseless and useRegex:
4632        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
4633        try:
4634            if len(symbols)==len("".join(symbols)):
4635                return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4636            else:
4637                return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4638        except Exception:
4639            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4640                    SyntaxWarning, stacklevel=2)
4641
4642
4643    # last resort, just use MatchFirst
4644    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4645
4646def dictOf( key, value ):
4647    """
4648    Helper to easily and clearly define a dictionary by specifying the respective patterns
4649    for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4650    in the proper order.  The key pattern can include delimiting markers or punctuation,
4651    as long as they are suppressed, thereby leaving the significant key text.  The value
4652    pattern can include named results, so that the C{Dict} results can include named token
4653    fields.
4654
4655    Example::
4656        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4657        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4658        print(OneOrMore(attr_expr).parseString(text).dump())
4659
4660        attr_label = label
4661        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4662
4663        # similar to Dict, but simpler call format
4664        result = dictOf(attr_label, attr_value).parseString(text)
4665        print(result.dump())
4666        print(result['shape'])
4667        print(result.shape)  # object attribute access works too
4668        print(result.asDict())
4669    prints::
4670        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4671        - color: light blue
4672        - posn: upper left
4673        - shape: SQUARE
4674        - texture: burlap
4675        SQUARE
4676        SQUARE
4677        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4678    """
4679    return Dict( ZeroOrMore( Group ( key + value ) ) )
4680
4681def originalTextFor(expr, asString=True):
4682    """
4683    Helper to return the original, untokenized text for a given expression.  Useful to
4684    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4685    revert separate tokens with intervening whitespace back to the original matching
4686    input text. By default, returns astring containing the original parsed text.
4687
4688    If the optional C{asString} argument is passed as C{False}, then the return value is a
4689    C{L{ParseResults}} containing any results names that were originally matched, and a
4690    single token containing the original matched text from the input string.  So if
4691    the expression passed to C{L{originalTextFor}} contains expressions with defined
4692    results names, you must set C{asString} to C{False} if you want to preserve those
4693    results name values.
4694
4695    Example::
4696        src = "this is test <b> bold <i>text</i> </b> normal text "
4697        for tag in ("b","i"):
4698            opener,closer = makeHTMLTags(tag)
4699            patt = originalTextFor(opener + SkipTo(closer) + closer)
4700            print(patt.searchString(src)[0])
4701    prints::
4702        ['<b> bold <i>text</i> </b>']
4703        ['<i>text</i>']
4704    """
4705    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4706    endlocMarker = locMarker.copy()
4707    endlocMarker.callPreparse = False
4708    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4709    if asString:
4710        extractText = lambda s,l,t: s[t._original_start:t._original_end]
4711    else:
4712        def extractText(s,l,t):
4713            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4714    matchExpr.setParseAction(extractText)
4715    matchExpr.ignoreExprs = expr.ignoreExprs
4716    return matchExpr
4717
4718def ungroup(expr):
4719    """
4720    Helper to undo pyparsing's default grouping of And expressions, even
4721    if all but one are non-empty.
4722    """
4723    return TokenConverter(expr).setParseAction(lambda t:t[0])
4724
4725def locatedExpr(expr):
4726    """
4727    Helper to decorate a returned token with its starting and ending locations in the input string.
4728    This helper adds the following results names:
4729     - locn_start = location where matched expression begins
4730     - locn_end = location where matched expression ends
4731     - value = the actual parsed results
4732
4733    Be careful if the input text contains C{<TAB>} characters, you may want to call
4734    C{L{ParserElement.parseWithTabs}}
4735
4736    Example::
4737        wd = Word(alphas)
4738        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4739            print(match)
4740    prints::
4741        [[0, 'ljsdf', 5]]
4742        [[8, 'lksdjjf', 15]]
4743        [[18, 'lkkjj', 23]]
4744    """
4745    locator = Empty().setParseAction(lambda s,l,t: l)
4746    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4747
4748
4749# convenience constants for positional expressions
4750empty       = Empty().setName("empty")
4751lineStart   = LineStart().setName("lineStart")
4752lineEnd     = LineEnd().setName("lineEnd")
4753stringStart = StringStart().setName("stringStart")
4754stringEnd   = StringEnd().setName("stringEnd")
4755
4756_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4757_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4758_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4759_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1)
4760_charRange = Group(_singleChar + Suppress("-") + _singleChar)
4761_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4762
4763def srange(s):
4764    r"""
4765    Helper to easily define string ranges for use in Word construction.  Borrows
4766    syntax from regexp '[]' string range definitions::
4767        srange("[0-9]")   -> "0123456789"
4768        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
4769        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4770    The input string must be enclosed in []'s, and the returned string is the expanded
4771    character set joined into a single string.
4772    The values enclosed in the []'s may be:
4773     - a single character
4774     - an escaped character with a leading backslash (such as C{\-} or C{\]})
4775     - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4776         (C{\0x##} is also supported for backwards compatibility)
4777     - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4778     - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4779     - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4780    """
4781    _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4782    try:
4783        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4784    except Exception:
4785        return ""
4786
4787def matchOnlyAtCol(n):
4788    """
4789    Helper method for defining parse actions that require matching at a specific
4790    column in the input text.
4791    """
4792    def verifyCol(strg,locn,toks):
4793        if col(locn,strg) != n:
4794            raise ParseException(strg,locn,"matched token not at column %d" % n)
4795    return verifyCol
4796
4797def replaceWith(replStr):
4798    """
4799    Helper method for common parse actions that simply return a literal value.  Especially
4800    useful when used with C{L{transformString<ParserElement.transformString>}()}.
4801
4802    Example::
4803        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4804        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4805        term = na | num
4806
4807        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4808    """
4809    return lambda s,l,t: [replStr]
4810
4811def removeQuotes(s,l,t):
4812    """
4813    Helper parse action for removing quotation marks from parsed quoted strings.
4814
4815    Example::
4816        # by default, quotation marks are included in parsed results
4817        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4818
4819        # use removeQuotes to strip quotation marks from parsed results
4820        quotedString.setParseAction(removeQuotes)
4821        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4822    """
4823    return t[0][1:-1]
4824
4825def tokenMap(func, *args):
4826    """
4827    Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4828    args are passed, they are forwarded to the given function as additional arguments after
4829    the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4830    parsed data to an integer using base 16.
4831
4832    Example (compare the last to example in L{ParserElement.transformString}::
4833        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4834        hex_ints.runTests('''
4835            00 11 22 aa FF 0a 0d 1a
4836            ''')
4837
4838        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4839        OneOrMore(upperword).runTests('''
4840            my kingdom for a horse
4841            ''')
4842
4843        wd = Word(alphas).setParseAction(tokenMap(str.title))
4844        OneOrMore(wd).setParseAction(' '.join).runTests('''
4845            now is the winter of our discontent made glorious summer by this sun of york
4846            ''')
4847    prints::
4848        00 11 22 aa FF 0a 0d 1a
4849        [0, 17, 34, 170, 255, 10, 13, 26]
4850
4851        my kingdom for a horse
4852        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4853
4854        now is the winter of our discontent made glorious summer by this sun of york
4855        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4856    """
4857    def pa(s,l,t):
4858        return [func(tokn, *args) for tokn in t]
4859
4860    try:
4861        func_name = getattr(func, '__name__',
4862                            getattr(func, '__class__').__name__)
4863    except Exception:
4864        func_name = str(func)
4865    pa.__name__ = func_name
4866
4867    return pa
4868
4869upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4870"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
4871
4872downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4873"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4874
4875def _makeTags(tagStr, xml):
4876    """Internal helper to construct opening and closing tag expressions, given a tag name"""
4877    if isinstance(tagStr,basestring):
4878        resname = tagStr
4879        tagStr = Keyword(tagStr, caseless=not xml)
4880    else:
4881        resname = tagStr.name
4882
4883    tagAttrName = Word(alphas,alphanums+"_-:")
4884    if (xml):
4885        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
4886        openTag = Suppress("<") + tagStr("tag") + \
4887                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
4888                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4889    else:
4890        printablesLessRAbrack = "".join(c for c in printables if c not in ">")
4891        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
4892        openTag = Suppress("<") + tagStr("tag") + \
4893                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
4894                Optional( Suppress("=") + tagAttrValue ) ))) + \
4895                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4896    closeTag = Combine(_L("</") + tagStr + ">")
4897
4898    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
4899    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
4900    openTag.tag = resname
4901    closeTag.tag = resname
4902    return openTag, closeTag
4903
4904def makeHTMLTags(tagStr):
4905    """
4906    Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
4907    tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
4908
4909    Example::
4910        text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
4911        # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
4912        a,a_end = makeHTMLTags("A")
4913        link_expr = a + SkipTo(a_end)("link_text") + a_end
4914
4915        for link in link_expr.searchString(text):
4916            # attributes in the <A> tag (like "href" shown here) are also accessible as named results
4917            print(link.link_text, '->', link.href)
4918    prints::
4919        pyparsing -> http://pyparsing.wikispaces.com
4920    """
4921    return _makeTags( tagStr, False )
4922
4923def makeXMLTags(tagStr):
4924    """
4925    Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
4926    tags only in the given upper/lower case.
4927
4928    Example: similar to L{makeHTMLTags}
4929    """
4930    return _makeTags( tagStr, True )
4931
4932def withAttribute(*args,**attrDict):
4933    """
4934    Helper to create a validating parse action to be used with start tags created
4935    with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4936    with a required attribute value, to avoid false matches on common tags such as
4937    C{<TD>} or C{<DIV>}.
4938
4939    Call C{withAttribute} with a series of attribute names and values. Specify the list
4940    of filter attributes names and values as:
4941     - keyword arguments, as in C{(align="right")}, or
4942     - as an explicit dict with C{**} operator, when an attribute name is also a Python
4943          reserved word, as in C{**{"class":"Customer", "align":"right"}}
4944     - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4945    For attribute names with a namespace prefix, you must use the second form.  Attribute
4946    names are matched insensitive to upper/lower case.
4947
4948    If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4949
4950    To verify that the attribute exists, but without specifying a value, pass
4951    C{withAttribute.ANY_VALUE} as the value.
4952
4953    Example::
4954        html = '''
4955            <div>
4956            Some text
4957            <div type="grid">1 4 0 1 0</div>
4958            <div type="graph">1,3 2,3 1,1</div>
4959            <div>this has no type</div>
4960            </div>
4961
4962        '''
4963        div,div_end = makeHTMLTags("div")
4964
4965        # only match div tag having a type attribute with value "grid"
4966        div_grid = div().setParseAction(withAttribute(type="grid"))
4967        grid_expr = div_grid + SkipTo(div | div_end)("body")
4968        for grid_header in grid_expr.searchString(html):
4969            print(grid_header.body)
4970
4971        # construct a match with any div tag having a type attribute, regardless of the value
4972        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4973        div_expr = div_any_type + SkipTo(div | div_end)("body")
4974        for div_header in div_expr.searchString(html):
4975            print(div_header.body)
4976    prints::
4977        1 4 0 1 0
4978
4979        1 4 0 1 0
4980        1,3 2,3 1,1
4981    """
4982    if args:
4983        attrs = args[:]
4984    else:
4985        attrs = attrDict.items()
4986    attrs = [(k,v) for k,v in attrs]
4987    def pa(s,l,tokens):
4988        for attrName,attrValue in attrs:
4989            if attrName not in tokens:
4990                raise ParseException(s,l,"no matching attribute " + attrName)
4991            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4992                raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4993                                            (attrName, tokens[attrName], attrValue))
4994    return pa
4995withAttribute.ANY_VALUE = object()
4996
4997def withClass(classname, namespace=''):
4998    """
4999    Simplified version of C{L{withAttribute}} when matching on a div class - made
5000    difficult because C{class} is a reserved word in Python.
5001
5002    Example::
5003        html = '''
5004            <div>
5005            Some text
5006            <div class="grid">1 4 0 1 0</div>
5007            <div class="graph">1,3 2,3 1,1</div>
5008            <div>this &lt;div&gt; has no class</div>
5009            </div>
5010
5011        '''
5012        div,div_end = makeHTMLTags("div")
5013        div_grid = div().setParseAction(withClass("grid"))
5014
5015        grid_expr = div_grid + SkipTo(div | div_end)("body")
5016        for grid_header in grid_expr.searchString(html):
5017            print(grid_header.body)
5018
5019        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
5020        div_expr = div_any_type + SkipTo(div | div_end)("body")
5021        for div_header in div_expr.searchString(html):
5022            print(div_header.body)
5023    prints::
5024        1 4 0 1 0
5025
5026        1 4 0 1 0
5027        1,3 2,3 1,1
5028    """
5029    classattr = "%s:class" % namespace if namespace else "class"
5030    return withAttribute(**{classattr : classname})
5031
5032opAssoc = _Constants()
5033opAssoc.LEFT = object()
5034opAssoc.RIGHT = object()
5035
5036def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5037    """
5038    Helper method for constructing grammars of expressions made up of
5039    operators working in a precedence hierarchy.  Operators may be unary or
5040    binary, left- or right-associative.  Parse actions can also be attached
5041    to operator expressions. The generated parser will also recognize the use
5042    of parentheses to override operator precedences (see example below).
5043
5044    Note: if you define a deep operator list, you may see performance issues
5045    when using infixNotation. See L{ParserElement.enablePackrat} for a
5046    mechanism to potentially improve your parser performance.
5047
5048    Parameters:
5049     - baseExpr - expression representing the most basic element for the nested
5050     - opList - list of tuples, one for each operator precedence level in the
5051      expression grammar; each tuple is of the form
5052      (opExpr, numTerms, rightLeftAssoc, parseAction), where:
5053       - opExpr is the pyparsing expression for the operator;
5054          may also be a string, which will be converted to a Literal;
5055          if numTerms is 3, opExpr is a tuple of two expressions, for the
5056          two operators separating the 3 terms
5057       - numTerms is the number of terms for this operator (must
5058          be 1, 2, or 3)
5059       - rightLeftAssoc is the indicator whether the operator is
5060          right or left associative, using the pyparsing-defined
5061          constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5062       - parseAction is the parse action to be associated with
5063          expressions matching this operator expression (the
5064          parse action tuple member may be omitted); if the parse action
5065          is passed a tuple or list of functions, this is equivalent to
5066          calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
5067     - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5068     - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5069
5070    Example::
5071        # simple example of four-function arithmetic with ints and variable names
5072        integer = pyparsing_common.signed_integer
5073        varname = pyparsing_common.identifier
5074
5075        arith_expr = infixNotation(integer | varname,
5076            [
5077            ('-', 1, opAssoc.RIGHT),
5078            (oneOf('* /'), 2, opAssoc.LEFT),
5079            (oneOf('+ -'), 2, opAssoc.LEFT),
5080            ])
5081
5082        arith_expr.runTests('''
5083            5+3*6
5084            (5+3)*6
5085            -2--11
5086            ''', fullDump=False)
5087    prints::
5088        5+3*6
5089        [[5, '+', [3, '*', 6]]]
5090
5091        (5+3)*6
5092        [[[5, '+', 3], '*', 6]]
5093
5094        -2--11
5095        [[['-', 2], '-', ['-', 11]]]
5096    """
5097    ret = Forward()
5098    lastExpr = baseExpr | ( lpar + ret + rpar )
5099    for i,operDef in enumerate(opList):
5100        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5101        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5102        if arity == 3:
5103            if opExpr is None or len(opExpr) != 2:
5104                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5105            opExpr1, opExpr2 = opExpr
5106        thisExpr = Forward().setName(termName)
5107        if rightLeftAssoc == opAssoc.LEFT:
5108            if arity == 1:
5109                matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5110            elif arity == 2:
5111                if opExpr is not None:
5112                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5113                else:
5114                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5115            elif arity == 3:
5116                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5117                            Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5118            else:
5119                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5120        elif rightLeftAssoc == opAssoc.RIGHT:
5121            if arity == 1:
5122                # try to avoid LR with this extra test
5123                if not isinstance(opExpr, Optional):
5124                    opExpr = Optional(opExpr)
5125                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5126            elif arity == 2:
5127                if opExpr is not None:
5128                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5129                else:
5130                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5131            elif arity == 3:
5132                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5133                            Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5134            else:
5135                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5136        else:
5137            raise ValueError("operator must indicate right or left associativity")
5138        if pa:
5139            if isinstance(pa, (tuple, list)):
5140                matchExpr.setParseAction(*pa)
5141            else:
5142                matchExpr.setParseAction(pa)
5143        thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5144        lastExpr = thisExpr
5145    ret <<= lastExpr
5146    return ret
5147
5148operatorPrecedence = infixNotation
5149"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5150
5151dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5152sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5153quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5154                       Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5155unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5156
5157def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5158    """
5159    Helper method for defining nested lists enclosed in opening and closing
5160    delimiters ("(" and ")" are the default).
5161
5162    Parameters:
5163     - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5164     - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5165     - content - expression for items within the nested lists (default=C{None})
5166     - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5167
5168    If an expression is not provided for the content argument, the nested
5169    expression will capture all whitespace-delimited content between delimiters
5170    as a list of separate values.
5171
5172    Use the C{ignoreExpr} argument to define expressions that may contain
5173    opening or closing characters that should not be treated as opening
5174    or closing characters for nesting, such as quotedString or a comment
5175    expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5176    The default is L{quotedString}, but if no expressions are to be ignored,
5177    then pass C{None} for this argument.
5178
5179    Example::
5180        data_type = oneOf("void int short long char float double")
5181        decl_data_type = Combine(data_type + Optional(Word('*')))
5182        ident = Word(alphas+'_', alphanums+'_')
5183        number = pyparsing_common.number
5184        arg = Group(decl_data_type + ident)
5185        LPAR,RPAR = map(Suppress, "()")
5186
5187        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5188
5189        c_function = (decl_data_type("type")
5190                      + ident("name")
5191                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5192                      + code_body("body"))
5193        c_function.ignore(cStyleComment)
5194
5195        source_code = '''
5196            int is_odd(int x) {
5197                return (x%2);
5198            }
5199
5200            int dec_to_hex(char hchar) {
5201                if (hchar >= '0' && hchar <= '9') {
5202                    return (ord(hchar)-ord('0'));
5203                } else {
5204                    return (10+ord(hchar)-ord('A'));
5205                }
5206            }
5207        '''
5208        for func in c_function.searchString(source_code):
5209            print("%(name)s (%(type)s) args: %(args)s" % func)
5210
5211    prints::
5212        is_odd (int) args: [['int', 'x']]
5213        dec_to_hex (int) args: [['char', 'hchar']]
5214    """
5215    if opener == closer:
5216        raise ValueError("opening and closing strings cannot be the same")
5217    if content is None:
5218        if isinstance(opener,basestring) and isinstance(closer,basestring):
5219            if len(opener) == 1 and len(closer)==1:
5220                if ignoreExpr is not None:
5221                    content = (Combine(OneOrMore(~ignoreExpr +
5222                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5223                                ).setParseAction(lambda t:t[0].strip()))
5224                else:
5225                    content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5226                                ).setParseAction(lambda t:t[0].strip()))
5227            else:
5228                if ignoreExpr is not None:
5229                    content = (Combine(OneOrMore(~ignoreExpr +
5230                                    ~Literal(opener) + ~Literal(closer) +
5231                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5232                                ).setParseAction(lambda t:t[0].strip()))
5233                else:
5234                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5235                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5236                                ).setParseAction(lambda t:t[0].strip()))
5237        else:
5238            raise ValueError("opening and closing arguments must be strings if no content expression is given")
5239    ret = Forward()
5240    if ignoreExpr is not None:
5241        ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5242    else:
5243        ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
5244    ret.setName('nested %s%s expression' % (opener,closer))
5245    return ret
5246
5247def indentedBlock(blockStatementExpr, indentStack, indent=True):
5248    """
5249    Helper method for defining space-delimited indentation blocks, such as
5250    those used to define block statements in Python source code.
5251
5252    Parameters:
5253     - blockStatementExpr - expression defining syntax of statement that
5254            is repeated within the indented block
5255     - indentStack - list created by caller to manage indentation stack
5256            (multiple statementWithIndentedBlock expressions within a single grammar
5257            should share a common indentStack)
5258     - indent - boolean indicating whether block must be indented beyond the
5259            the current level; set to False for block of left-most statements
5260            (default=C{True})
5261
5262    A valid block must contain at least one C{blockStatement}.
5263
5264    Example::
5265        data = '''
5266        def A(z):
5267          A1
5268          B = 100
5269          G = A2
5270          A2
5271          A3
5272        B
5273        def BB(a,b,c):
5274          BB1
5275          def BBA():
5276            bba1
5277            bba2
5278            bba3
5279        C
5280        D
5281        def spam(x,y):
5282             def eggs(z):
5283                 pass
5284        '''
5285
5286
5287        indentStack = [1]
5288        stmt = Forward()
5289
5290        identifier = Word(alphas, alphanums)
5291        funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5292        func_body = indentedBlock(stmt, indentStack)
5293        funcDef = Group( funcDecl + func_body )
5294
5295        rvalue = Forward()
5296        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5297        rvalue << (funcCall | identifier | Word(nums))
5298        assignment = Group(identifier + "=" + rvalue)
5299        stmt << ( funcDef | assignment | identifier )
5300
5301        module_body = OneOrMore(stmt)
5302
5303        parseTree = module_body.parseString(data)
5304        parseTree.pprint()
5305    prints::
5306        [['def',
5307          'A',
5308          ['(', 'z', ')'],
5309          ':',
5310          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5311         'B',
5312         ['def',
5313          'BB',
5314          ['(', 'a', 'b', 'c', ')'],
5315          ':',
5316          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5317         'C',
5318         'D',
5319         ['def',
5320          'spam',
5321          ['(', 'x', 'y', ')'],
5322          ':',
5323          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5324    """
5325    def checkPeerIndent(s,l,t):
5326        if l >= len(s): return
5327        curCol = col(l,s)
5328        if curCol != indentStack[-1]:
5329            if curCol > indentStack[-1]:
5330                raise ParseFatalException(s,l,"illegal nesting")
5331            raise ParseException(s,l,"not a peer entry")
5332
5333    def checkSubIndent(s,l,t):
5334        curCol = col(l,s)
5335        if curCol > indentStack[-1]:
5336            indentStack.append( curCol )
5337        else:
5338            raise ParseException(s,l,"not a subentry")
5339
5340    def checkUnindent(s,l,t):
5341        if l >= len(s): return
5342        curCol = col(l,s)
5343        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5344            raise ParseException(s,l,"not an unindent")
5345        indentStack.pop()
5346
5347    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5348    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5349    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
5350    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5351    if indent:
5352        smExpr = Group( Optional(NL) +
5353            #~ FollowedBy(blockStatementExpr) +
5354            INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5355    else:
5356        smExpr = Group( Optional(NL) +
5357            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5358    blockStatementExpr.ignore(_bslash + LineEnd())
5359    return smExpr.setName('indented block')
5360
5361alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5362punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5363
5364anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5365_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5366commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5367def replaceHTMLEntity(t):
5368    """Helper parser action to replace common HTML entities with their special characters"""
5369    return _htmlEntityMap.get(t.entity)
5370
5371# it's easy to get these comment structures wrong - they're very common, so may as well make them available
5372cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5373"Comment of the form C{/* ... */}"
5374
5375htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5376"Comment of the form C{<!-- ... -->}"
5377
5378restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5379dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5380"Comment of the form C{// ... (to end of line)}"
5381
5382cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5383"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5384
5385javaStyleComment = cppStyleComment
5386"Same as C{L{cppStyleComment}}"
5387
5388pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5389"Comment of the form C{# ... (to end of line)}"
5390
5391_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5392                                  Optional( Word(" \t") +
5393                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5394commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5395"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5396   This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5397
5398# some other useful expressions - using lower-case class name since we are really using this as a namespace
5399class pyparsing_common:
5400    """
5401    Here are some common low-level expressions that may be useful in jump-starting parser development:
5402     - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5403     - common L{programming identifiers<identifier>}
5404     - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5405     - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5406     - L{UUID<uuid>}
5407     - L{comma-separated list<comma_separated_list>}
5408    Parse actions:
5409     - C{L{convertToInteger}}
5410     - C{L{convertToFloat}}
5411     - C{L{convertToDate}}
5412     - C{L{convertToDatetime}}
5413     - C{L{stripHTMLTags}}
5414     - C{L{upcaseTokens}}
5415     - C{L{downcaseTokens}}
5416
5417    Example::
5418        pyparsing_common.number.runTests('''
5419            # any int or real number, returned as the appropriate type
5420            100
5421            -100
5422            +100
5423            3.14159
5424            6.02e23
5425            1e-12
5426            ''')
5427
5428        pyparsing_common.fnumber.runTests('''
5429            # any int or real number, returned as float
5430            100
5431            -100
5432            +100
5433            3.14159
5434            6.02e23
5435            1e-12
5436            ''')
5437
5438        pyparsing_common.hex_integer.runTests('''
5439            # hex numbers
5440            100
5441            FF
5442            ''')
5443
5444        pyparsing_common.fraction.runTests('''
5445            # fractions
5446            1/2
5447            -3/4
5448            ''')
5449
5450        pyparsing_common.mixed_integer.runTests('''
5451            # mixed fractions
5452            1
5453            1/2
5454            -3/4
5455            1-3/4
5456            ''')
5457
5458        import uuid
5459        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5460        pyparsing_common.uuid.runTests('''
5461            # uuid
5462            12345678-1234-5678-1234-567812345678
5463            ''')
5464    prints::
5465        # any int or real number, returned as the appropriate type
5466        100
5467        [100]
5468
5469        -100
5470        [-100]
5471
5472        +100
5473        [100]
5474
5475        3.14159
5476        [3.14159]
5477
5478        6.02e23
5479        [6.02e+23]
5480
5481        1e-12
5482        [1e-12]
5483
5484        # any int or real number, returned as float
5485        100
5486        [100.0]
5487
5488        -100
5489        [-100.0]
5490
5491        +100
5492        [100.0]
5493
5494        3.14159
5495        [3.14159]
5496
5497        6.02e23
5498        [6.02e+23]
5499
5500        1e-12
5501        [1e-12]
5502
5503        # hex numbers
5504        100
5505        [256]
5506
5507        FF
5508        [255]
5509
5510        # fractions
5511        1/2
5512        [0.5]
5513
5514        -3/4
5515        [-0.75]
5516
5517        # mixed fractions
5518        1
5519        [1]
5520
5521        1/2
5522        [0.5]
5523
5524        -3/4
5525        [-0.75]
5526
5527        1-3/4
5528        [1.75]
5529
5530        # uuid
5531        12345678-1234-5678-1234-567812345678
5532        [UUID('12345678-1234-5678-1234-567812345678')]
5533    """
5534
5535    convertToInteger = tokenMap(int)
5536    """
5537    Parse action for converting parsed integers to Python int
5538    """
5539
5540    convertToFloat = tokenMap(float)
5541    """
5542    Parse action for converting parsed numbers to Python float
5543    """
5544
5545    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5546    """expression that parses an unsigned integer, returns an int"""
5547
5548    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5549    """expression that parses a hexadecimal integer, returns an int"""
5550
5551    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5552    """expression that parses an integer with optional leading sign, returns an int"""
5553
5554    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5555    """fractional expression of an integer divided by an integer, returns a float"""
5556    fraction.addParseAction(lambda t: t[0]/t[-1])
5557
5558    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5559    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5560    mixed_integer.addParseAction(sum)
5561
5562    real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5563    """expression that parses a floating point number and returns a float"""
5564
5565    sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5566    """expression that parses a floating point number with optional scientific notation and returns a float"""
5567
5568    # streamlining this expression makes the docs nicer-looking
5569    number = (sci_real | real | signed_integer).streamline()
5570    """any numeric expression, returns the corresponding Python type"""
5571
5572    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5573    """any int or real number, returned as float"""
5574
5575    identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5576    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5577
5578    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5579    "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5580
5581    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5582    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5583    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5584    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5585    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5586    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5587    "IPv6 address (long, short, or mixed form)"
5588
5589    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5590    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5591
5592    @staticmethod
5593    def convertToDate(fmt="%Y-%m-%d"):
5594        """
5595        Helper to create a parse action for converting parsed date string to Python datetime.date
5596
5597        Params -
5598         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5599
5600        Example::
5601            date_expr = pyparsing_common.iso8601_date.copy()
5602            date_expr.setParseAction(pyparsing_common.convertToDate())
5603            print(date_expr.parseString("1999-12-31"))
5604        prints::
5605            [datetime.date(1999, 12, 31)]
5606        """
5607        def cvt_fn(s,l,t):
5608            try:
5609                return datetime.strptime(t[0], fmt).date()
5610            except ValueError as ve:
5611                raise ParseException(s, l, str(ve))
5612        return cvt_fn
5613
5614    @staticmethod
5615    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5616        """
5617        Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5618
5619        Params -
5620         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5621
5622        Example::
5623            dt_expr = pyparsing_common.iso8601_datetime.copy()
5624            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5625            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5626        prints::
5627            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5628        """
5629        def cvt_fn(s,l,t):
5630            try:
5631                return datetime.strptime(t[0], fmt)
5632            except ValueError as ve:
5633                raise ParseException(s, l, str(ve))
5634        return cvt_fn
5635
5636    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5637    "ISO8601 date (C{yyyy-mm-dd})"
5638
5639    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5640    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5641
5642    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5643    "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5644
5645    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5646    @staticmethod
5647    def stripHTMLTags(s, l, tokens):
5648        """
5649        Parse action to remove HTML tags from web page HTML source
5650
5651        Example::
5652            # strip HTML links from normal text
5653            text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
5654            td,td_end = makeHTMLTags("TD")
5655            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
5656
5657            print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
5658        """
5659        return pyparsing_common._html_stripper.transformString(tokens[0])
5660
5661    _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
5662                                        + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5663    comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5664    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5665
5666    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5667    """Parse action to convert tokens to upper case."""
5668
5669    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5670    """Parse action to convert tokens to lower case."""
5671
5672
5673if __name__ == "__main__":
5674
5675    selectToken    = CaselessLiteral("select")
5676    fromToken      = CaselessLiteral("from")
5677
5678    ident          = Word(alphas, alphanums + "_$")
5679
5680    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5681    columnNameList = Group(delimitedList(columnName)).setName("columns")
5682    columnSpec     = ('*' | columnNameList)
5683
5684    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5685    tableNameList  = Group(delimitedList(tableName)).setName("tables")
5686
5687    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5688
5689    # demo runTests method, including embedded comments in test string
5690    simpleSQL.runTests("""
5691        # '*' as column list and dotted table name
5692        select * from SYS.XYZZY
5693
5694        # caseless match on "SELECT", and casts back to "select"
5695        SELECT * from XYZZY, ABC
5696
5697        # list of column names, and mixed case SELECT keyword
5698        Select AA,BB,CC from Sys.dual
5699
5700        # multiple tables
5701        Select A, B, C from Sys.dual, Table2
5702
5703        # invalid SELECT keyword - should fail
5704        Xelect A, B, C from Sys.dual
5705
5706        # incomplete command - should fail
5707        Select
5708
5709        # invalid column name - should fail
5710        Select ^^^ frox Sys.dual
5711
5712        """)
5713
5714    pyparsing_common.number.runTests("""
5715        100
5716        -100
5717        +100
5718        3.14159
5719        6.02e23
5720        1e-12
5721        """)
5722
5723    # any int or real number, returned as float
5724    pyparsing_common.fnumber.runTests("""
5725        100
5726        -100
5727        +100
5728        3.14159
5729        6.02e23
5730        1e-12
5731        """)
5732
5733    pyparsing_common.hex_integer.runTests("""
5734        100
5735        FF
5736        """)
5737
5738    import uuid
5739    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5740    pyparsing_common.uuid.runTests("""
5741        12345678-1234-5678-1234-567812345678
5742        """)
5743