1# module pyparsing.py
2#
3# Copyright (c) 2003-2016  Paul T. McGuire
4#
5# Permission is hereby granted, free of charge, to any person obtaining
6# a copy of this software and associated documentation files (the
7# "Software"), to deal in the Software without restriction, including
8# without limitation the rights to use, copy, modify, merge, publish,
9# distribute, sublicense, and/or sell copies of the Software, and to
10# permit persons to whom the Software is furnished to do so, subject to
11# the following conditions:
12#
13# The above copyright notice and this permission notice shall be
14# included in all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23#
24# flake8: noqa
25
26__doc__ = \
27"""
28pyparsing module - Classes and methods to define and execute parsing grammars
29
30The pyparsing module is an alternative approach to creating and executing simple grammars,
31vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
32don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33provides a library of classes that you use to construct the grammar directly in Python.
34
35Here is a program to parse "Hello, World!" (or any greeting of the form
36C{"<salutation>, <addressee>!"}), built up using L{Word}, L{Literal}, and L{And} elements
37(L{'+'<ParserElement.__add__>} operator gives L{And} expressions, strings are auto-converted to
38L{Literal} expressions)::
39
40    from pyparsing import Word, alphas
41
42    # define grammar of a greeting
43    greet = Word(alphas) + "," + Word(alphas) + "!"
44
45    hello = "Hello, World!"
46    print (hello, "->", greet.parseString(hello))
47
48The program outputs the following::
49
50    Hello, World! -> ['Hello', ',', 'World', '!']
51
52The Python representation of the grammar is quite readable, owing to the self-explanatory
53class names, and the use of '+', '|' and '^' operators.
54
55The L{ParseResults} object returned from L{ParserElement.parseString<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
56object with named attributes.
57
58The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
59 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
60 - quoted strings
61 - embedded comments
62"""
63
64__version__ = "2.2.0"
65__versionTime__ = "06 Mar 2017 02:06 UTC"
66__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
67
68import string
69from weakref import ref as wkref
70import copy
71import sys
72import warnings
73import re
74import sre_constants
75import collections
76import pprint
77import traceback
78import types
79from datetime import datetime
80
81try:
82    from _thread import RLock
83except ImportError:
84    from threading import RLock
85
86try:
87    from collections import OrderedDict as _OrderedDict
88except ImportError:
89    try:
90        from ordereddict import OrderedDict as _OrderedDict
91    except ImportError:
92        _OrderedDict = None
93
94#~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
95
96__all__ = [
97'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
98'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
99'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
100'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
101'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
102'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
103'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
104'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
105'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
106'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
107'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
108'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
109'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
110'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
111'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
112'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
113'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
114'CloseMatch', 'tokenMap', 'pyparsing_common',
115]
116
117system_version = tuple(sys.version_info)[:3]
118PY_3 = system_version[0] == 3
119if PY_3:
120    _MAX_INT = sys.maxsize
121    basestring = str
122    unichr = chr
123    _ustr = str
124
125    # build list of single arg builtins, that can be used as parse actions
126    singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
127
128else:
129    _MAX_INT = sys.maxint
130    range = xrange
131
132    def _ustr(obj):
133        """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
134           str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
135           then < returns the unicode object | encodes it with the default encoding | ... >.
136        """
137        if isinstance(obj,unicode):
138            return obj
139
140        try:
141            # If this works, then _ustr(obj) has the same behaviour as str(obj), so
142            # it won't break any existing code.
143            return str(obj)
144
145        except UnicodeEncodeError:
146            # Else encode it
147            ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
148            xmlcharref = Regex(r'&#\d+;')
149            xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
150            return xmlcharref.transformString(ret)
151
152    # build list of single arg builtins, tolerant of Python version, that can be used as parse actions
153    singleArgBuiltins = []
154    import __builtin__
155    for fname in "sum len sorted reversed list tuple set any all min max".split():
156        try:
157            singleArgBuiltins.append(getattr(__builtin__,fname))
158        except AttributeError:
159            continue
160
161_generatorType = type((y for y in range(1)))
162
163def _xml_escape(data):
164    """Escape &, <, >, ", ', etc. in a string of data."""
165
166    # ampersand must be replaced first
167    from_symbols = '&><"\''
168    to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
169    for from_,to_ in zip(from_symbols, to_symbols):
170        data = data.replace(from_, to_)
171    return data
172
173class _Constants(object):
174    pass
175
176alphas     = string.ascii_uppercase + string.ascii_lowercase
177nums       = "0123456789"
178hexnums    = nums + "ABCDEFabcdef"
179alphanums  = alphas + nums
180_bslash    = chr(92)
181printables = "".join(c for c in string.printable if c not in string.whitespace)
182
183class ParseBaseException(Exception):
184    """base exception class for all parsing runtime exceptions"""
185    # Performance tuning: we construct a *lot* of these, so keep this
186    # constructor as small and fast as possible
187    def __init__( self, pstr, loc=0, msg=None, elem=None ):
188        self.loc = loc
189        if msg is None:
190            self.msg = pstr
191            self.pstr = ""
192        else:
193            self.msg = msg
194            self.pstr = pstr
195        self.parserElement = elem
196        self.args = (pstr, loc, msg)
197
198    @classmethod
199    def _from_exception(cls, pe):
200        """
201        internal factory method to simplify creating one type of ParseException
202        from another - avoids having __init__ signature conflicts among subclasses
203        """
204        return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
205
206    def __getattr__( self, aname ):
207        """supported attributes by name are:
208            - lineno - returns the line number of the exception text
209            - col - returns the column number of the exception text
210            - line - returns the line containing the exception text
211        """
212        if( aname == "lineno" ):
213            return lineno( self.loc, self.pstr )
214        elif( aname in ("col", "column") ):
215            return col( self.loc, self.pstr )
216        elif( aname == "line" ):
217            return line( self.loc, self.pstr )
218        else:
219            raise AttributeError(aname)
220
221    def __str__( self ):
222        return "%s (at char %d), (line:%d, col:%d)" % \
223                ( self.msg, self.loc, self.lineno, self.column )
224    def __repr__( self ):
225        return _ustr(self)
226    def markInputline( self, markerString = ">!<" ):
227        """Extracts the exception line from the input string, and marks
228           the location of the exception with a special symbol.
229        """
230        line_str = self.line
231        line_column = self.column - 1
232        if markerString:
233            line_str = "".join((line_str[:line_column],
234                                markerString, line_str[line_column:]))
235        return line_str.strip()
236    def __dir__(self):
237        return "lineno col line".split() + dir(type(self))
238
239class ParseException(ParseBaseException):
240    """
241    Exception thrown when parse expressions don't match class;
242    supported attributes by name are:
243     - lineno - returns the line number of the exception text
244     - col - returns the column number of the exception text
245     - line - returns the line containing the exception text
246
247    Example::
248        try:
249            Word(nums).setName("integer").parseString("ABC")
250        except ParseException as pe:
251            print(pe)
252            print("column: {}".format(pe.col))
253
254    prints::
255       Expected integer (at char 0), (line:1, col:1)
256        column: 1
257    """
258    pass
259
260class ParseFatalException(ParseBaseException):
261    """user-throwable exception thrown when inconsistent parse content
262       is found; stops all parsing immediately"""
263    pass
264
265class ParseSyntaxException(ParseFatalException):
266    """just like L{ParseFatalException}, but thrown internally when an
267       L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
268       immediately because an unbacktrackable syntax error has been found"""
269    pass
270
271#~ class ReparseException(ParseBaseException):
272    #~ """Experimental class - parse actions can raise this exception to cause
273       #~ pyparsing to reparse the input string:
274        #~ - with a modified input string, and/or
275        #~ - with a modified start location
276       #~ Set the values of the ReparseException in the constructor, and raise the
277       #~ exception in a parse action to cause pyparsing to use the new string/location.
278       #~ Setting the values as None causes no change to be made.
279       #~ """
280    #~ def __init_( self, newstring, restartLoc ):
281        #~ self.newParseText = newstring
282        #~ self.reparseLoc = restartLoc
283
284class RecursiveGrammarException(Exception):
285    """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
286    def __init__( self, parseElementList ):
287        self.parseElementTrace = parseElementList
288
289    def __str__( self ):
290        return "RecursiveGrammarException: %s" % self.parseElementTrace
291
292class _ParseResultsWithOffset(object):
293    def __init__(self,p1,p2):
294        self.tup = (p1,p2)
295    def __getitem__(self,i):
296        return self.tup[i]
297    def __repr__(self):
298        return repr(self.tup[0])
299    def setOffset(self,i):
300        self.tup = (self.tup[0],i)
301
302class ParseResults(object):
303    """
304    Structured parse results, to provide multiple means of access to the parsed data:
305       - as a list (C{len(results)})
306       - by list index (C{results[0], results[1]}, etc.)
307       - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
308
309    Example::
310        integer = Word(nums)
311        date_str = (integer.setResultsName("year") + '/'
312                        + integer.setResultsName("month") + '/'
313                        + integer.setResultsName("day"))
314        # equivalent form:
315        # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
316
317        # parseString returns a ParseResults object
318        result = date_str.parseString("1999/12/31")
319
320        def test(s, fn=repr):
321            print("%s -> %s" % (s, fn(eval(s))))
322        test("list(result)")
323        test("result[0]")
324        test("result['month']")
325        test("result.day")
326        test("'month' in result")
327        test("'minutes' in result")
328        test("result.dump()", str)
329    prints::
330        list(result) -> ['1999', '/', '12', '/', '31']
331        result[0] -> '1999'
332        result['month'] -> '12'
333        result.day -> '31'
334        'month' in result -> True
335        'minutes' in result -> False
336        result.dump() -> ['1999', '/', '12', '/', '31']
337        - day: 31
338        - month: 12
339        - year: 1999
340    """
341    def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
342        if isinstance(toklist, cls):
343            return toklist
344        retobj = object.__new__(cls)
345        retobj.__doinit = True
346        return retobj
347
348    # Performance tuning: we construct a *lot* of these, so keep this
349    # constructor as small and fast as possible
350    def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
351        if self.__doinit:
352            self.__doinit = False
353            self.__name = None
354            self.__parent = None
355            self.__accumNames = {}
356            self.__asList = asList
357            self.__modal = modal
358            if toklist is None:
359                toklist = []
360            if isinstance(toklist, list):
361                self.__toklist = toklist[:]
362            elif isinstance(toklist, _generatorType):
363                self.__toklist = list(toklist)
364            else:
365                self.__toklist = [toklist]
366            self.__tokdict = dict()
367
368        if name is not None and name:
369            if not modal:
370                self.__accumNames[name] = 0
371            if isinstance(name,int):
372                name = _ustr(name) # will always return a str, but use _ustr for consistency
373            self.__name = name
374            if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
375                if isinstance(toklist,basestring):
376                    toklist = [ toklist ]
377                if asList:
378                    if isinstance(toklist,ParseResults):
379                        self[name] = _ParseResultsWithOffset(toklist.copy(),0)
380                    else:
381                        self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
382                    self[name].__name = name
383                else:
384                    try:
385                        self[name] = toklist[0]
386                    except (KeyError,TypeError,IndexError):
387                        self[name] = toklist
388
389    def __getitem__( self, i ):
390        if isinstance( i, (int,slice) ):
391            return self.__toklist[i]
392        else:
393            if i not in self.__accumNames:
394                return self.__tokdict[i][-1][0]
395            else:
396                return ParseResults([ v[0] for v in self.__tokdict[i] ])
397
398    def __setitem__( self, k, v, isinstance=isinstance ):
399        if isinstance(v,_ParseResultsWithOffset):
400            self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
401            sub = v[0]
402        elif isinstance(k,(int,slice)):
403            self.__toklist[k] = v
404            sub = v
405        else:
406            self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
407            sub = v
408        if isinstance(sub,ParseResults):
409            sub.__parent = wkref(self)
410
411    def __delitem__( self, i ):
412        if isinstance(i,(int,slice)):
413            mylen = len( self.__toklist )
414            del self.__toklist[i]
415
416            # convert int to slice
417            if isinstance(i, int):
418                if i < 0:
419                    i += mylen
420                i = slice(i, i+1)
421            # get removed indices
422            removed = list(range(*i.indices(mylen)))
423            removed.reverse()
424            # fixup indices in token dictionary
425            for name,occurrences in self.__tokdict.items():
426                for j in removed:
427                    for k, (value, position) in enumerate(occurrences):
428                        occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
429        else:
430            del self.__tokdict[i]
431
432    def __contains__( self, k ):
433        return k in self.__tokdict
434
435    def __len__( self ): return len( self.__toklist )
436    def __bool__(self): return ( not not self.__toklist )
437    __nonzero__ = __bool__
438    def __iter__( self ): return iter( self.__toklist )
439    def __reversed__( self ): return iter( self.__toklist[::-1] )
440    def _iterkeys( self ):
441        if hasattr(self.__tokdict, "iterkeys"):
442            return self.__tokdict.iterkeys()
443        else:
444            return iter(self.__tokdict)
445
446    def _itervalues( self ):
447        return (self[k] for k in self._iterkeys())
448
449    def _iteritems( self ):
450        return ((k, self[k]) for k in self._iterkeys())
451
452    if PY_3:
453        keys = _iterkeys
454        """Returns an iterator of all named result keys (Python 3.x only)."""
455
456        values = _itervalues
457        """Returns an iterator of all named result values (Python 3.x only)."""
458
459        items = _iteritems
460        """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
461
462    else:
463        iterkeys = _iterkeys
464        """Returns an iterator of all named result keys (Python 2.x only)."""
465
466        itervalues = _itervalues
467        """Returns an iterator of all named result values (Python 2.x only)."""
468
469        iteritems = _iteritems
470        """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
471
472        def keys( self ):
473            """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
474            return list(self.iterkeys())
475
476        def values( self ):
477            """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
478            return list(self.itervalues())
479
480        def items( self ):
481            """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
482            return list(self.iteritems())
483
484    def haskeys( self ):
485        """Since keys() returns an iterator, this method is helpful in bypassing
486           code that looks for the existence of any defined results names."""
487        return bool(self.__tokdict)
488
489    def pop( self, *args, **kwargs):
490        """
491        Removes and returns item at specified index (default=C{last}).
492        Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
493        argument or an integer argument, it will use C{list} semantics
494        and pop tokens from the list of parsed tokens. If passed a
495        non-integer argument (most likely a string), it will use C{dict}
496        semantics and pop the corresponding value from any defined
497        results names. A second default return value argument is
498        supported, just as in C{dict.pop()}.
499
500        Example::
501            def remove_first(tokens):
502                tokens.pop(0)
503            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
504            print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
505
506            label = Word(alphas)
507            patt = label("LABEL") + OneOrMore(Word(nums))
508            print(patt.parseString("AAB 123 321").dump())
509
510            # Use pop() in a parse action to remove named result (note that corresponding value is not
511            # removed from list form of results)
512            def remove_LABEL(tokens):
513                tokens.pop("LABEL")
514                return tokens
515            patt.addParseAction(remove_LABEL)
516            print(patt.parseString("AAB 123 321").dump())
517        prints::
518            ['AAB', '123', '321']
519            - LABEL: AAB
520
521            ['AAB', '123', '321']
522        """
523        if not args:
524            args = [-1]
525        for k,v in kwargs.items():
526            if k == 'default':
527                args = (args[0], v)
528            else:
529                raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
530        if (isinstance(args[0], int) or
531                        len(args) == 1 or
532                        args[0] in self):
533            index = args[0]
534            ret = self[index]
535            del self[index]
536            return ret
537        else:
538            defaultvalue = args[1]
539            return defaultvalue
540
541    def get(self, key, defaultValue=None):
542        """
543        Returns named result matching the given key, or if there is no
544        such name, then returns the given C{defaultValue} or C{None} if no
545        C{defaultValue} is specified.
546
547        Similar to C{dict.get()}.
548
549        Example::
550            integer = Word(nums)
551            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
552
553            result = date_str.parseString("1999/12/31")
554            print(result.get("year")) # -> '1999'
555            print(result.get("hour", "not specified")) # -> 'not specified'
556            print(result.get("hour")) # -> None
557        """
558        if key in self:
559            return self[key]
560        else:
561            return defaultValue
562
563    def insert( self, index, insStr ):
564        """
565        Inserts new element at location index in the list of parsed tokens.
566
567        Similar to C{list.insert()}.
568
569        Example::
570            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
571
572            # use a parse action to insert the parse location in the front of the parsed results
573            def insert_locn(locn, tokens):
574                tokens.insert(0, locn)
575            print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
576        """
577        self.__toklist.insert(index, insStr)
578        # fixup indices in token dictionary
579        for name,occurrences in self.__tokdict.items():
580            for k, (value, position) in enumerate(occurrences):
581                occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
582
583    def append( self, item ):
584        """
585        Add single element to end of ParseResults list of elements.
586
587        Example::
588            print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
589
590            # use a parse action to compute the sum of the parsed integers, and add it to the end
591            def append_sum(tokens):
592                tokens.append(sum(map(int, tokens)))
593            print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
594        """
595        self.__toklist.append(item)
596
597    def extend( self, itemseq ):
598        """
599        Add sequence of elements to end of ParseResults list of elements.
600
601        Example::
602            patt = OneOrMore(Word(alphas))
603
604            # use a parse action to append the reverse of the matched strings, to make a palindrome
605            def make_palindrome(tokens):
606                tokens.extend(reversed([t[::-1] for t in tokens]))
607                return ''.join(tokens)
608            print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
609        """
610        if isinstance(itemseq, ParseResults):
611            self += itemseq
612        else:
613            self.__toklist.extend(itemseq)
614
615    def clear( self ):
616        """
617        Clear all elements and results names.
618        """
619        del self.__toklist[:]
620        self.__tokdict.clear()
621
622    def __getattr__( self, name ):
623        try:
624            return self[name]
625        except KeyError:
626            return ""
627
628        if name in self.__tokdict:
629            if name not in self.__accumNames:
630                return self.__tokdict[name][-1][0]
631            else:
632                return ParseResults([ v[0] for v in self.__tokdict[name] ])
633        else:
634            return ""
635
636    def __add__( self, other ):
637        ret = self.copy()
638        ret += other
639        return ret
640
641    def __iadd__( self, other ):
642        if other.__tokdict:
643            offset = len(self.__toklist)
644            addoffset = lambda a: offset if a<0 else a+offset
645            otheritems = other.__tokdict.items()
646            otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
647                                for (k,vlist) in otheritems for v in vlist]
648            for k,v in otherdictitems:
649                self[k] = v
650                if isinstance(v[0],ParseResults):
651                    v[0].__parent = wkref(self)
652
653        self.__toklist += other.__toklist
654        self.__accumNames.update( other.__accumNames )
655        return self
656
657    def __radd__(self, other):
658        if isinstance(other,int) and other == 0:
659            # useful for merging many ParseResults using sum() builtin
660            return self.copy()
661        else:
662            # this may raise a TypeError - so be it
663            return other + self
664
665    def __repr__( self ):
666        return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
667
668    def __str__( self ):
669        return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
670
671    def _asStringList( self, sep='' ):
672        out = []
673        for item in self.__toklist:
674            if out and sep:
675                out.append(sep)
676            if isinstance( item, ParseResults ):
677                out += item._asStringList()
678            else:
679                out.append( _ustr(item) )
680        return out
681
682    def asList( self ):
683        """
684        Returns the parse results as a nested list of matching tokens, all converted to strings.
685
686        Example::
687            patt = OneOrMore(Word(alphas))
688            result = patt.parseString("sldkj lsdkj sldkj")
689            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
690            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
691
692            # Use asList() to create an actual list
693            result_list = result.asList()
694            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
695        """
696        return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
697
698    def asDict( self ):
699        """
700        Returns the named parse results as a nested dictionary.
701
702        Example::
703            integer = Word(nums)
704            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
705
706            result = date_str.parseString('12/31/1999')
707            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
708
709            result_dict = result.asDict()
710            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
711
712            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
713            import json
714            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
715            print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
716        """
717        if PY_3:
718            item_fn = self.items
719        else:
720            item_fn = self.iteritems
721
722        def toItem(obj):
723            if isinstance(obj, ParseResults):
724                if obj.haskeys():
725                    return obj.asDict()
726                else:
727                    return [toItem(v) for v in obj]
728            else:
729                return obj
730
731        return dict((k,toItem(v)) for k,v in item_fn())
732
733    def copy( self ):
734        """
735        Returns a new copy of a C{ParseResults} object.
736        """
737        ret = ParseResults( self.__toklist )
738        ret.__tokdict = self.__tokdict.copy()
739        ret.__parent = self.__parent
740        ret.__accumNames.update( self.__accumNames )
741        ret.__name = self.__name
742        return ret
743
744    def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
745        """
746        (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
747        """
748        nl = "\n"
749        out = []
750        namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
751                                                            for v in vlist)
752        nextLevelIndent = indent + "  "
753
754        # collapse out indents if formatting is not desired
755        if not formatted:
756            indent = ""
757            nextLevelIndent = ""
758            nl = ""
759
760        selfTag = None
761        if doctag is not None:
762            selfTag = doctag
763        else:
764            if self.__name:
765                selfTag = self.__name
766
767        if not selfTag:
768            if namedItemsOnly:
769                return ""
770            else:
771                selfTag = "ITEM"
772
773        out += [ nl, indent, "<", selfTag, ">" ]
774
775        for i,res in enumerate(self.__toklist):
776            if isinstance(res,ParseResults):
777                if i in namedItems:
778                    out += [ res.asXML(namedItems[i],
779                                        namedItemsOnly and doctag is None,
780                                        nextLevelIndent,
781                                        formatted)]
782                else:
783                    out += [ res.asXML(None,
784                                        namedItemsOnly and doctag is None,
785                                        nextLevelIndent,
786                                        formatted)]
787            else:
788                # individual token, see if there is a name for it
789                resTag = None
790                if i in namedItems:
791                    resTag = namedItems[i]
792                if not resTag:
793                    if namedItemsOnly:
794                        continue
795                    else:
796                        resTag = "ITEM"
797                xmlBodyText = _xml_escape(_ustr(res))
798                out += [ nl, nextLevelIndent, "<", resTag, ">",
799                                                xmlBodyText,
800                                                "</", resTag, ">" ]
801
802        out += [ nl, indent, "</", selfTag, ">" ]
803        return "".join(out)
804
805    def __lookup(self,sub):
806        for k,vlist in self.__tokdict.items():
807            for v,loc in vlist:
808                if sub is v:
809                    return k
810        return None
811
812    def getName(self):
813        r"""
814        Returns the results name for this token expression. Useful when several
815        different expressions might match at a particular location.
816
817        Example::
818            integer = Word(nums)
819            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
820            house_number_expr = Suppress('#') + Word(nums, alphanums)
821            user_data = (Group(house_number_expr)("house_number")
822                        | Group(ssn_expr)("ssn")
823                        | Group(integer)("age"))
824            user_info = OneOrMore(user_data)
825
826            result = user_info.parseString("22 111-22-3333 #221B")
827            for item in result:
828                print(item.getName(), ':', item[0])
829        prints::
830            age : 22
831            ssn : 111-22-3333
832            house_number : 221B
833        """
834        if self.__name:
835            return self.__name
836        elif self.__parent:
837            par = self.__parent()
838            if par:
839                return par.__lookup(self)
840            else:
841                return None
842        elif (len(self) == 1 and
843               len(self.__tokdict) == 1 and
844               next(iter(self.__tokdict.values()))[0][1] in (0,-1)):
845            return next(iter(self.__tokdict.keys()))
846        else:
847            return None
848
849    def dump(self, indent='', depth=0, full=True):
850        """
851        Diagnostic method for listing out the contents of a C{ParseResults}.
852        Accepts an optional C{indent} argument so that this string can be embedded
853        in a nested display of other data.
854
855        Example::
856            integer = Word(nums)
857            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
858
859            result = date_str.parseString('12/31/1999')
860            print(result.dump())
861        prints::
862            ['12', '/', '31', '/', '1999']
863            - day: 1999
864            - month: 31
865            - year: 12
866        """
867        out = []
868        NL = '\n'
869        out.append( indent+_ustr(self.asList()) )
870        if full:
871            if self.haskeys():
872                items = sorted((str(k), v) for k,v in self.items())
873                for k,v in items:
874                    if out:
875                        out.append(NL)
876                    out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
877                    if isinstance(v,ParseResults):
878                        if v:
879                            out.append( v.dump(indent,depth+1) )
880                        else:
881                            out.append(_ustr(v))
882                    else:
883                        out.append(repr(v))
884            elif any(isinstance(vv,ParseResults) for vv in self):
885                v = self
886                for i,vv in enumerate(v):
887                    if isinstance(vv,ParseResults):
888                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),vv.dump(indent,depth+1) ))
889                    else:
890                        out.append("\n%s%s[%d]:\n%s%s%s" % (indent,('  '*(depth)),i,indent,('  '*(depth+1)),_ustr(vv)))
891
892        return "".join(out)
893
894    def pprint(self, *args, **kwargs):
895        """
896        Pretty-printer for parsed results as a list, using the C{pprint} module.
897        Accepts additional positional or keyword args as defined for the
898        C{pprint.pprint} method. (U{https://docs.python.org/3/library/pprint.html#pprint.pprint})
899
900        Example::
901            ident = Word(alphas, alphanums)
902            num = Word(nums)
903            func = Forward()
904            term = ident | num | Group('(' + func + ')')
905            func <<= ident + Group(Optional(delimitedList(term)))
906            result = func.parseString("fna a,b,(fnb c,d,200),100")
907            result.pprint(width=40)
908        prints::
909            ['fna',
910             ['a',
911              'b',
912              ['(', 'fnb', ['c', 'd', '200'], ')'],
913              '100']]
914        """
915        pprint.pprint(self.asList(), *args, **kwargs)
916
917    # add support for pickle protocol
918    def __getstate__(self):
919        return ( self.__toklist,
920                 ( self.__tokdict.copy(),
921                   self.__parent is not None and self.__parent() or None,
922                   self.__accumNames,
923                   self.__name ) )
924
925    def __setstate__(self,state):
926        self.__toklist = state[0]
927        (self.__tokdict,
928         par,
929         inAccumNames,
930         self.__name) = state[1]
931        self.__accumNames = {}
932        self.__accumNames.update(inAccumNames)
933        if par is not None:
934            self.__parent = wkref(par)
935        else:
936            self.__parent = None
937
938    def __getnewargs__(self):
939        return self.__toklist, self.__name, self.__asList, self.__modal
940
941    def __dir__(self):
942        return (dir(type(self)) + list(self.keys()))
943
944collections.MutableMapping.register(ParseResults)
945
946def col (loc,strg):
947    """Returns current column within a string, counting newlines as line separators.
948   The first column is number 1.
949
950   Note: the default parsing behavior is to expand tabs in the input string
951   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
952   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
953   consistent view of the parsed string, the parse location, and line and column
954   positions within the parsed string.
955   """
956    s = strg
957    return 1 if 0<loc<len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc)
958
959def lineno(loc,strg):
960    """Returns current line number within a string, counting newlines as line separators.
961   The first line is number 1.
962
963   Note: the default parsing behavior is to expand tabs in the input string
964   before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
965   on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
966   consistent view of the parsed string, the parse location, and line and column
967   positions within the parsed string.
968   """
969    return strg.count("\n",0,loc) + 1
970
971def line( loc, strg ):
972    """Returns the line of text containing loc within a string, counting newlines as line separators.
973       """
974    lastCR = strg.rfind("\n", 0, loc)
975    nextCR = strg.find("\n", loc)
976    if nextCR >= 0:
977        return strg[lastCR+1:nextCR]
978    else:
979        return strg[lastCR+1:]
980
981def _defaultStartDebugAction( instring, loc, expr ):
982    print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
983
984def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
985    print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
986
987def _defaultExceptionDebugAction( instring, loc, expr, exc ):
988    print ("Exception raised:" + _ustr(exc))
989
990def nullDebugAction(*args):
991    """'Do-nothing' debug action, to suppress debugging output during parsing."""
992    pass
993
994# Only works on Python 3.x - nonlocal is toxic to Python 2 installs
995#~ 'decorator to trim function calls to match the arity of the target'
996#~ def _trim_arity(func, maxargs=3):
997    #~ if func in singleArgBuiltins:
998        #~ return lambda s,l,t: func(t)
999    #~ limit = 0
1000    #~ foundArity = False
1001    #~ def wrapper(*args):
1002        #~ nonlocal limit,foundArity
1003        #~ while 1:
1004            #~ try:
1005                #~ ret = func(*args[limit:])
1006                #~ foundArity = True
1007                #~ return ret
1008            #~ except TypeError:
1009                #~ if limit == maxargs or foundArity:
1010                    #~ raise
1011                #~ limit += 1
1012                #~ continue
1013    #~ return wrapper
1014
1015# this version is Python 2.x-3.x cross-compatible
1016'decorator to trim function calls to match the arity of the target'
1017def _trim_arity(func, maxargs=2):
1018    if func in singleArgBuiltins:
1019        return lambda s,l,t: func(t)
1020    limit = [0]
1021    foundArity = [False]
1022
1023    def extract_stack(limit=0):
1024        offset = -2
1025        frame_summary = traceback.extract_stack(limit=-offset+limit-1)[offset]
1026        return [(frame_summary.filename, frame_summary.lineno)]
1027    def extract_tb(tb, limit=0):
1028        frames = traceback.extract_tb(tb, limit=limit)
1029        frame_summary = frames[-1]
1030        return [(frame_summary.filename, frame_summary.lineno)]
1031
1032    # synthesize what would be returned by traceback.extract_stack at the call to
1033    # user's parse action 'func', so that we don't incur call penalty at parse time
1034
1035    LINE_DIFF = 6
1036    # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND
1037    # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!!
1038    this_line = extract_stack(limit=2)[-1]
1039    pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1040
1041    def wrapper(*args):
1042        while 1:
1043            try:
1044                ret = func(*args[limit[0]:])
1045                foundArity[0] = True
1046                return ret
1047            except TypeError:
1048                # re-raise TypeErrors if they did not come from our arity testing
1049                if foundArity[0]:
1050                    raise
1051                else:
1052                    try:
1053                        tb = sys.exc_info()[-1]
1054                        if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth:
1055                            raise
1056                    finally:
1057                        del tb
1058
1059                if limit[0] <= maxargs:
1060                    limit[0] += 1
1061                    continue
1062                raise
1063
1064    # copy func name to wrapper for sensible debug output
1065    func_name = "<parse action>"
1066    try:
1067        func_name = getattr(func, '__name__',
1068                            getattr(func, '__class__').__name__)
1069    except Exception:
1070        func_name = str(func)
1071    wrapper.__name__ = func_name
1072
1073    return wrapper
1074
1075class ParserElement(object):
1076    """Abstract base level parser element class."""
1077    DEFAULT_WHITE_CHARS = " \n\t\r"
1078    verbose_stacktrace = False
1079
1080    @staticmethod
1081    def setDefaultWhitespaceChars( chars ):
1082        r"""
1083        Overrides the default whitespace chars
1084
1085        Example::
1086            # default whitespace chars are space, <TAB> and newline
1087            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def', 'ghi', 'jkl']
1088
1089            # change to just treat newline as significant
1090            ParserElement.setDefaultWhitespaceChars(" \t")
1091            OneOrMore(Word(alphas)).parseString("abc def\nghi jkl")  # -> ['abc', 'def']
1092        """
1093        ParserElement.DEFAULT_WHITE_CHARS = chars
1094
1095    @staticmethod
1096    def inlineLiteralsUsing(cls):
1097        """
1098        Set class to be used for inclusion of string literals into a parser.
1099
1100        Example::
1101            # default literal class used is Literal
1102            integer = Word(nums)
1103            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1104
1105            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1106
1107
1108            # change to Suppress
1109            ParserElement.inlineLiteralsUsing(Suppress)
1110            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1111
1112            date_str.parseString("1999/12/31")  # -> ['1999', '12', '31']
1113        """
1114        ParserElement._literalStringClass = cls
1115
1116    def __init__( self, savelist=False ):
1117        self.parseAction = list()
1118        self.failAction = None
1119        #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
1120        self.strRepr = None
1121        self.resultsName = None
1122        self.saveAsList = savelist
1123        self.skipWhitespace = True
1124        self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1125        self.copyDefaultWhiteChars = True
1126        self.mayReturnEmpty = False # used when checking for left-recursion
1127        self.keepTabs = False
1128        self.ignoreExprs = list()
1129        self.debug = False
1130        self.streamlined = False
1131        self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
1132        self.errmsg = ""
1133        self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
1134        self.debugActions = ( None, None, None ) #custom debug actions
1135        self.re = None
1136        self.callPreparse = True # used to avoid redundant calls to preParse
1137        self.callDuringTry = False
1138
1139    def copy( self ):
1140        """
1141        Make a copy of this C{ParserElement}.  Useful for defining different parse actions
1142        for the same parsing pattern, using copies of the original parse element.
1143
1144        Example::
1145            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1146            integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1147            integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1148
1149            print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1150        prints::
1151            [5120, 100, 655360, 268435456]
1152        Equivalent form of C{expr.copy()} is just C{expr()}::
1153            integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1154        """
1155        cpy = copy.copy( self )
1156        cpy.parseAction = self.parseAction[:]
1157        cpy.ignoreExprs = self.ignoreExprs[:]
1158        if self.copyDefaultWhiteChars:
1159            cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1160        return cpy
1161
1162    def setName( self, name ):
1163        """
1164        Define name for this expression, makes debugging and exception messages clearer.
1165
1166        Example::
1167            Word(nums).parseString("ABC")  # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1168            Word(nums).setName("integer").parseString("ABC")  # -> Exception: Expected integer (at char 0), (line:1, col:1)
1169        """
1170        self.name = name
1171        self.errmsg = "Expected " + self.name
1172        if hasattr(self,"exception"):
1173            self.exception.msg = self.errmsg
1174        return self
1175
1176    def setResultsName( self, name, listAllMatches=False ):
1177        """
1178        Define name for referencing matching tokens as a nested attribute
1179        of the returned parse results.
1180        NOTE: this returns a *copy* of the original C{ParserElement} object;
1181        this is so that the client can define a basic element, such as an
1182        integer, and reference it in multiple places with different names.
1183
1184        You can also set results names using the abbreviated syntax,
1185        C{expr("name")} in place of C{expr.setResultsName("name")} -
1186        see L{I{__call__}<__call__>}.
1187
1188        Example::
1189            date_str = (integer.setResultsName("year") + '/'
1190                        + integer.setResultsName("month") + '/'
1191                        + integer.setResultsName("day"))
1192
1193            # equivalent form:
1194            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1195        """
1196        newself = self.copy()
1197        if name.endswith("*"):
1198            name = name[:-1]
1199            listAllMatches=True
1200        newself.resultsName = name
1201        newself.modalResults = not listAllMatches
1202        return newself
1203
1204    def setBreak(self,breakFlag = True):
1205        """Method to invoke the Python pdb debugger when this element is
1206           about to be parsed. Set C{breakFlag} to True to enable, False to
1207           disable.
1208        """
1209        if breakFlag:
1210            _parseMethod = self._parse
1211            def breaker(instring, loc, doActions=True, callPreParse=True):
1212                import pdb
1213                pdb.set_trace()
1214                return _parseMethod( instring, loc, doActions, callPreParse )
1215            breaker._originalParseMethod = _parseMethod
1216            self._parse = breaker
1217        else:
1218            if hasattr(self._parse,"_originalParseMethod"):
1219                self._parse = self._parse._originalParseMethod
1220        return self
1221
1222    def setParseAction( self, *fns, **kwargs ):
1223        """
1224        Define one or more actions to perform when successfully matching parse element definition.
1225        Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1226        C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1227         - s   = the original string being parsed (see note below)
1228         - loc = the location of the matching substring
1229         - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1230        If the functions in fns modify the tokens, they can return them as the return
1231        value from fn, and the modified list of tokens will replace the original.
1232        Otherwise, fn does not need to return any value.
1233
1234        Optional keyword arguments:
1235         - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1236
1237        Note: the default parsing behavior is to expand tabs in the input string
1238        before starting the parsing process.  See L{I{parseString}<parseString>} for more information
1239        on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1240        consistent view of the parsed string, the parse location, and line and column
1241        positions within the parsed string.
1242
1243        Example::
1244            integer = Word(nums)
1245            date_str = integer + '/' + integer + '/' + integer
1246
1247            date_str.parseString("1999/12/31")  # -> ['1999', '/', '12', '/', '31']
1248
1249            # use parse action to convert to ints at parse time
1250            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1251            date_str = integer + '/' + integer + '/' + integer
1252
1253            # note that integer fields are now ints, not strings
1254            date_str.parseString("1999/12/31")  # -> [1999, '/', 12, '/', 31]
1255        """
1256        self.parseAction = list(map(_trim_arity, list(fns)))
1257        self.callDuringTry = kwargs.get("callDuringTry", False)
1258        return self
1259
1260    def addParseAction( self, *fns, **kwargs ):
1261        """
1262        Add one or more parse actions to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1263
1264        See examples in L{I{copy}<copy>}.
1265        """
1266        self.parseAction += list(map(_trim_arity, list(fns)))
1267        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1268        return self
1269
1270    def addCondition(self, *fns, **kwargs):
1271        """Add a boolean predicate function to expression's list of parse actions. See
1272        L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1273        functions passed to C{addCondition} need to return boolean success/fail of the condition.
1274
1275        Optional keyword arguments:
1276         - message = define a custom message to be used in the raised exception
1277         - fatal   = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1278
1279        Example::
1280            integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1281            year_int = integer.copy()
1282            year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1283            date_str = year_int + '/' + integer + '/' + integer
1284
1285            result = date_str.parseString("1999/12/31")  # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1286        """
1287        msg = kwargs.get("message", "failed user-defined condition")
1288        exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1289        for fn in fns:
1290            def pa(s,l,t):
1291                if not bool(_trim_arity(fn)(s,l,t)):
1292                    raise exc_type(s,l,msg)
1293            self.parseAction.append(pa)
1294        self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1295        return self
1296
1297    def setFailAction( self, fn ):
1298        """Define action to perform if parsing fails at this expression.
1299           Fail action fn is a callable function that takes the arguments
1300           C{fn(s,loc,expr,err)} where:
1301            - s = string being parsed
1302            - loc = location where expression match was attempted and failed
1303            - expr = the parse expression that failed
1304            - err = the exception thrown
1305           The function returns no value.  It may throw C{L{ParseFatalException}}
1306           if it is desired to stop parsing immediately."""
1307        self.failAction = fn
1308        return self
1309
1310    def _skipIgnorables( self, instring, loc ):
1311        exprsFound = True
1312        while exprsFound:
1313            exprsFound = False
1314            for e in self.ignoreExprs:
1315                try:
1316                    while 1:
1317                        loc,dummy = e._parse( instring, loc )
1318                        exprsFound = True
1319                except ParseException:
1320                    pass
1321        return loc
1322
1323    def preParse( self, instring, loc ):
1324        if self.ignoreExprs:
1325            loc = self._skipIgnorables( instring, loc )
1326
1327        if self.skipWhitespace:
1328            wt = self.whiteChars
1329            instrlen = len(instring)
1330            while loc < instrlen and instring[loc] in wt:
1331                loc += 1
1332
1333        return loc
1334
1335    def parseImpl( self, instring, loc, doActions=True ):
1336        return loc, []
1337
1338    def postParse( self, instring, loc, tokenlist ):
1339        return tokenlist
1340
1341    #~ @profile
1342    def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1343        debugging = ( self.debug ) #and doActions )
1344
1345        if debugging or self.failAction:
1346            #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
1347            if (self.debugActions[0] ):
1348                self.debugActions[0]( instring, loc, self )
1349            if callPreParse and self.callPreparse:
1350                preloc = self.preParse( instring, loc )
1351            else:
1352                preloc = loc
1353            tokensStart = preloc
1354            try:
1355                try:
1356                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1357                except IndexError:
1358                    raise ParseException( instring, len(instring), self.errmsg, self )
1359            except ParseBaseException as err:
1360                #~ print ("Exception raised:", err)
1361                if self.debugActions[2]:
1362                    self.debugActions[2]( instring, tokensStart, self, err )
1363                if self.failAction:
1364                    self.failAction( instring, tokensStart, self, err )
1365                raise
1366        else:
1367            if callPreParse and self.callPreparse:
1368                preloc = self.preParse( instring, loc )
1369            else:
1370                preloc = loc
1371            tokensStart = preloc
1372            if self.mayIndexError or loc >= len(instring):
1373                try:
1374                    loc,tokens = self.parseImpl( instring, preloc, doActions )
1375                except IndexError:
1376                    raise ParseException( instring, len(instring), self.errmsg, self )
1377            else:
1378                loc,tokens = self.parseImpl( instring, preloc, doActions )
1379
1380        tokens = self.postParse( instring, loc, tokens )
1381
1382        retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1383        if self.parseAction and (doActions or self.callDuringTry):
1384            if debugging:
1385                try:
1386                    for fn in self.parseAction:
1387                        tokens = fn( instring, tokensStart, retTokens )
1388                        if tokens is not None:
1389                            retTokens = ParseResults( tokens,
1390                                                      self.resultsName,
1391                                                      asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1392                                                      modal=self.modalResults )
1393                except ParseBaseException as err:
1394                    #~ print "Exception raised in user parse action:", err
1395                    if (self.debugActions[2] ):
1396                        self.debugActions[2]( instring, tokensStart, self, err )
1397                    raise
1398            else:
1399                for fn in self.parseAction:
1400                    tokens = fn( instring, tokensStart, retTokens )
1401                    if tokens is not None:
1402                        retTokens = ParseResults( tokens,
1403                                                  self.resultsName,
1404                                                  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1405                                                  modal=self.modalResults )
1406
1407        if debugging:
1408            #~ print ("Matched",self,"->",retTokens.asList())
1409            if (self.debugActions[1] ):
1410                self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1411
1412        return loc, retTokens
1413
1414    def tryParse( self, instring, loc ):
1415        try:
1416            return self._parse( instring, loc, doActions=False )[0]
1417        except ParseFatalException:
1418            raise ParseException( instring, loc, self.errmsg, self)
1419
1420    def canParseNext(self, instring, loc):
1421        try:
1422            self.tryParse(instring, loc)
1423        except (ParseException, IndexError):
1424            return False
1425        else:
1426            return True
1427
1428    class _UnboundedCache(object):
1429        def __init__(self):
1430            cache = {}
1431            self.not_in_cache = not_in_cache = object()
1432
1433            def get(self, key):
1434                return cache.get(key, not_in_cache)
1435
1436            def set(self, key, value):
1437                cache[key] = value
1438
1439            def clear(self):
1440                cache.clear()
1441
1442            def cache_len(self):
1443                return len(cache)
1444
1445            self.get = types.MethodType(get, self)
1446            self.set = types.MethodType(set, self)
1447            self.clear = types.MethodType(clear, self)
1448            self.__len__ = types.MethodType(cache_len, self)
1449
1450    if _OrderedDict is not None:
1451        class _FifoCache(object):
1452            def __init__(self, size):
1453                self.not_in_cache = not_in_cache = object()
1454
1455                cache = _OrderedDict()
1456
1457                def get(self, key):
1458                    return cache.get(key, not_in_cache)
1459
1460                def set(self, key, value):
1461                    cache[key] = value
1462                    while len(cache) > size:
1463                        try:
1464                            cache.popitem(False)
1465                        except KeyError:
1466                            pass
1467
1468                def clear(self):
1469                    cache.clear()
1470
1471                def cache_len(self):
1472                    return len(cache)
1473
1474                self.get = types.MethodType(get, self)
1475                self.set = types.MethodType(set, self)
1476                self.clear = types.MethodType(clear, self)
1477                self.__len__ = types.MethodType(cache_len, self)
1478
1479    else:
1480        class _FifoCache(object):
1481            def __init__(self, size):
1482                self.not_in_cache = not_in_cache = object()
1483
1484                cache = {}
1485                key_fifo = collections.deque([], size)
1486
1487                def get(self, key):
1488                    return cache.get(key, not_in_cache)
1489
1490                def set(self, key, value):
1491                    cache[key] = value
1492                    while len(key_fifo) > size:
1493                        cache.pop(key_fifo.popleft(), None)
1494                    key_fifo.append(key)
1495
1496                def clear(self):
1497                    cache.clear()
1498                    key_fifo.clear()
1499
1500                def cache_len(self):
1501                    return len(cache)
1502
1503                self.get = types.MethodType(get, self)
1504                self.set = types.MethodType(set, self)
1505                self.clear = types.MethodType(clear, self)
1506                self.__len__ = types.MethodType(cache_len, self)
1507
1508    # argument cache for optimizing repeated calls when backtracking through recursive expressions
1509    packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail
1510    packrat_cache_lock = RLock()
1511    packrat_cache_stats = [0, 0]
1512
1513    # this method gets repeatedly called during backtracking with the same arguments -
1514    # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1515    def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1516        HIT, MISS = 0, 1
1517        lookup = (self, instring, loc, callPreParse, doActions)
1518        with ParserElement.packrat_cache_lock:
1519            cache = ParserElement.packrat_cache
1520            value = cache.get(lookup)
1521            if value is cache.not_in_cache:
1522                ParserElement.packrat_cache_stats[MISS] += 1
1523                try:
1524                    value = self._parseNoCache(instring, loc, doActions, callPreParse)
1525                except ParseBaseException as pe:
1526                    # cache a copy of the exception, without the traceback
1527                    cache.set(lookup, pe.__class__(*pe.args))
1528                    raise
1529                else:
1530                    cache.set(lookup, (value[0], value[1].copy()))
1531                    return value
1532            else:
1533                ParserElement.packrat_cache_stats[HIT] += 1
1534                if isinstance(value, Exception):
1535                    raise value
1536                return (value[0], value[1].copy())
1537
1538    _parse = _parseNoCache
1539
1540    @staticmethod
1541    def resetCache():
1542        ParserElement.packrat_cache.clear()
1543        ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1544
1545    _packratEnabled = False
1546    @staticmethod
1547    def enablePackrat(cache_size_limit=128):
1548        """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1549           Repeated parse attempts at the same string location (which happens
1550           often in many complex grammars) can immediately return a cached value,
1551           instead of re-executing parsing/validating code.  Memoizing is done of
1552           both valid results and parsing exceptions.
1553
1554           Parameters:
1555            - cache_size_limit - (default=C{128}) - if an integer value is provided
1556              will limit the size of the packrat cache; if None is passed, then
1557              the cache size will be unbounded; if 0 is passed, the cache will
1558              be effectively disabled.
1559
1560           This speedup may break existing programs that use parse actions that
1561           have side-effects.  For this reason, packrat parsing is disabled when
1562           you first import pyparsing.  To activate the packrat feature, your
1563           program must call the class method C{ParserElement.enablePackrat()}.  If
1564           your program uses C{psyco} to "compile as you go", you must call
1565           C{enablePackrat} before calling C{psyco.full()}.  If you do not do this,
1566           Python will crash.  For best results, call C{enablePackrat()} immediately
1567           after importing pyparsing.
1568
1569           Example::
1570               import pyparsing
1571               pyparsing.ParserElement.enablePackrat()
1572        """
1573        if not ParserElement._packratEnabled:
1574            ParserElement._packratEnabled = True
1575            if cache_size_limit is None:
1576                ParserElement.packrat_cache = ParserElement._UnboundedCache()
1577            else:
1578                ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1579            ParserElement._parse = ParserElement._parseCache
1580
1581    def parseString( self, instring, parseAll=False ):
1582        """
1583        Execute the parse expression with the given string.
1584        This is the main interface to the client code, once the complete
1585        expression has been built.
1586
1587        If you want the grammar to require that the entire input string be
1588        successfully parsed, then set C{parseAll} to True (equivalent to ending
1589        the grammar with C{L{StringEnd()}}).
1590
1591        Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1592        in order to report proper column numbers in parse actions.
1593        If the input string contains tabs and
1594        the grammar uses parse actions that use the C{loc} argument to index into the
1595        string being parsed, you can ensure you have a consistent view of the input
1596        string by:
1597         - calling C{parseWithTabs} on your grammar before calling C{parseString}
1598           (see L{I{parseWithTabs}<parseWithTabs>})
1599         - define your parse action using the full C{(s,loc,toks)} signature, and
1600           reference the input string using the parse action's C{s} argument
1601         - explicitly expand the tabs in your input string before calling
1602           C{parseString}
1603
1604        Example::
1605            Word('a').parseString('aaaaabaaa')  # -> ['aaaaa']
1606            Word('a').parseString('aaaaabaaa', parseAll=True)  # -> Exception: Expected end of text
1607        """
1608        ParserElement.resetCache()
1609        if not self.streamlined:
1610            self.streamline()
1611            #~ self.saveAsList = True
1612        for e in self.ignoreExprs:
1613            e.streamline()
1614        if not self.keepTabs:
1615            instring = instring.expandtabs()
1616        try:
1617            loc, tokens = self._parse( instring, 0 )
1618            if parseAll:
1619                loc = self.preParse( instring, loc )
1620                se = Empty() + StringEnd()
1621                se._parse( instring, loc )
1622        except ParseBaseException as exc:
1623            if ParserElement.verbose_stacktrace:
1624                raise
1625            else:
1626                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1627                raise exc
1628        else:
1629            return tokens
1630
1631    def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1632        """
1633        Scan the input string for expression matches.  Each match will return the
1634        matching tokens, start location, and end location.  May be called with optional
1635        C{maxMatches} argument, to clip scanning after 'n' matches are found.  If
1636        C{overlap} is specified, then overlapping matches will be reported.
1637
1638        Note that the start and end locations are reported relative to the string
1639        being parsed.  See L{I{parseString}<parseString>} for more information on parsing
1640        strings with embedded tabs.
1641
1642        Example::
1643            source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1644            print(source)
1645            for tokens,start,end in Word(alphas).scanString(source):
1646                print(' '*start + '^'*(end-start))
1647                print(' '*start + tokens[0])
1648
1649        prints::
1650
1651            sldjf123lsdjjkf345sldkjf879lkjsfd987
1652            ^^^^^
1653            sldjf
1654                    ^^^^^^^
1655                    lsdjjkf
1656                              ^^^^^^
1657                              sldkjf
1658                                       ^^^^^^
1659                                       lkjsfd
1660        """
1661        if not self.streamlined:
1662            self.streamline()
1663        for e in self.ignoreExprs:
1664            e.streamline()
1665
1666        if not self.keepTabs:
1667            instring = _ustr(instring).expandtabs()
1668        instrlen = len(instring)
1669        loc = 0
1670        preparseFn = self.preParse
1671        parseFn = self._parse
1672        ParserElement.resetCache()
1673        matches = 0
1674        try:
1675            while loc <= instrlen and matches < maxMatches:
1676                try:
1677                    preloc = preparseFn( instring, loc )
1678                    nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1679                except ParseException:
1680                    loc = preloc+1
1681                else:
1682                    if nextLoc > loc:
1683                        matches += 1
1684                        yield tokens, preloc, nextLoc
1685                        if overlap:
1686                            nextloc = preparseFn( instring, loc )
1687                            if nextloc > loc:
1688                                loc = nextLoc
1689                            else:
1690                                loc += 1
1691                        else:
1692                            loc = nextLoc
1693                    else:
1694                        loc = preloc+1
1695        except ParseBaseException as exc:
1696            if ParserElement.verbose_stacktrace:
1697                raise
1698            else:
1699                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1700                raise exc
1701
1702    def transformString( self, instring ):
1703        """
1704        Extension to C{L{scanString}}, to modify matching text with modified tokens that may
1705        be returned from a parse action.  To use C{transformString}, define a grammar and
1706        attach a parse action to it that modifies the returned token list.
1707        Invoking C{transformString()} on a target string will then scan for matches,
1708        and replace the matched text patterns according to the logic in the parse
1709        action.  C{transformString()} returns the resulting transformed string.
1710
1711        Example::
1712            wd = Word(alphas)
1713            wd.setParseAction(lambda toks: toks[0].title())
1714
1715            print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york."))
1716        Prints::
1717            Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York.
1718        """
1719        out = []
1720        lastE = 0
1721        # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1722        # keep string locs straight between transformString and scanString
1723        self.keepTabs = True
1724        try:
1725            for t,s,e in self.scanString( instring ):
1726                out.append( instring[lastE:s] )
1727                if t:
1728                    if isinstance(t,ParseResults):
1729                        out += t.asList()
1730                    elif isinstance(t,list):
1731                        out += t
1732                    else:
1733                        out.append(t)
1734                lastE = e
1735            out.append(instring[lastE:])
1736            out = [o for o in out if o]
1737            return "".join(map(_ustr,_flatten(out)))
1738        except ParseBaseException as exc:
1739            if ParserElement.verbose_stacktrace:
1740                raise
1741            else:
1742                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1743                raise exc
1744
1745    def searchString( self, instring, maxMatches=_MAX_INT ):
1746        """
1747        Another extension to C{L{scanString}}, simplifying the access to the tokens found
1748        to match the given parse expression.  May be called with optional
1749        C{maxMatches} argument, to clip searching after 'n' matches are found.
1750
1751        Example::
1752            # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1753            cap_word = Word(alphas.upper(), alphas.lower())
1754
1755            print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1756
1757            # the sum() builtin can be used to merge results into a single ParseResults object
1758            print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")))
1759        prints::
1760            [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']]
1761            ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity']
1762        """
1763        try:
1764            return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1765        except ParseBaseException as exc:
1766            if ParserElement.verbose_stacktrace:
1767                raise
1768            else:
1769                # catch and re-raise exception from here, clears out pyparsing internal stack trace
1770                raise exc
1771
1772    def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1773        """
1774        Generator method to split a string using the given expression as a separator.
1775        May be called with optional C{maxsplit} argument, to limit the number of splits;
1776        and the optional C{includeSeparators} argument (default=C{False}), if the separating
1777        matching text should be included in the split results.
1778
1779        Example::
1780            punc = oneOf(list(".,;:/-!?"))
1781            print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1782        prints::
1783            ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1784        """
1785        splits = 0
1786        last = 0
1787        for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1788            yield instring[last:s]
1789            if includeSeparators:
1790                yield t[0]
1791            last = e
1792        yield instring[last:]
1793
1794    def __add__(self, other ):
1795        """
1796        Implementation of + operator - returns C{L{And}}. Adding strings to a ParserElement
1797        converts them to L{Literal}s by default.
1798
1799        Example::
1800            greet = Word(alphas) + "," + Word(alphas) + "!"
1801            hello = "Hello, World!"
1802            print (hello, "->", greet.parseString(hello))
1803        Prints::
1804            Hello, World! -> ['Hello', ',', 'World', '!']
1805        """
1806        if isinstance( other, basestring ):
1807            other = ParserElement._literalStringClass( other )
1808        if not isinstance( other, ParserElement ):
1809            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1810                    SyntaxWarning, stacklevel=2)
1811            return None
1812        return And( [ self, other ] )
1813
1814    def __radd__(self, other ):
1815        """
1816        Implementation of + operator when left operand is not a C{L{ParserElement}}
1817        """
1818        if isinstance( other, basestring ):
1819            other = ParserElement._literalStringClass( other )
1820        if not isinstance( other, ParserElement ):
1821            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1822                    SyntaxWarning, stacklevel=2)
1823            return None
1824        return other + self
1825
1826    def __sub__(self, other):
1827        """
1828        Implementation of - operator, returns C{L{And}} with error stop
1829        """
1830        if isinstance( other, basestring ):
1831            other = ParserElement._literalStringClass( other )
1832        if not isinstance( other, ParserElement ):
1833            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1834                    SyntaxWarning, stacklevel=2)
1835            return None
1836        return self + And._ErrorStop() + other
1837
1838    def __rsub__(self, other ):
1839        """
1840        Implementation of - operator when left operand is not a C{L{ParserElement}}
1841        """
1842        if isinstance( other, basestring ):
1843            other = ParserElement._literalStringClass( other )
1844        if not isinstance( other, ParserElement ):
1845            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1846                    SyntaxWarning, stacklevel=2)
1847            return None
1848        return other - self
1849
1850    def __mul__(self,other):
1851        """
1852        Implementation of * operator, allows use of C{expr * 3} in place of
1853        C{expr + expr + expr}.  Expressions may also me multiplied by a 2-integer
1854        tuple, similar to C{{min,max}} multipliers in regular expressions.  Tuples
1855        may also include C{None} as in:
1856         - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1857              to C{expr*n + L{ZeroOrMore}(expr)}
1858              (read as "at least n instances of C{expr}")
1859         - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1860              (read as "0 to n instances of C{expr}")
1861         - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1862         - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1863
1864        Note that C{expr*(None,n)} does not raise an exception if
1865        more than n exprs exist in the input stream; that is,
1866        C{expr*(None,n)} does not enforce a maximum number of expr
1867        occurrences.  If this behavior is desired, then write
1868        C{expr*(None,n) + ~expr}
1869        """
1870        if isinstance(other,int):
1871            minElements, optElements = other,0
1872        elif isinstance(other,tuple):
1873            other = (other + (None, None))[:2]
1874            if other[0] is None:
1875                other = (0, other[1])
1876            if isinstance(other[0],int) and other[1] is None:
1877                if other[0] == 0:
1878                    return ZeroOrMore(self)
1879                if other[0] == 1:
1880                    return OneOrMore(self)
1881                else:
1882                    return self*other[0] + ZeroOrMore(self)
1883            elif isinstance(other[0],int) and isinstance(other[1],int):
1884                minElements, optElements = other
1885                optElements -= minElements
1886            else:
1887                raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1888        else:
1889            raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1890
1891        if minElements < 0:
1892            raise ValueError("cannot multiply ParserElement by negative value")
1893        if optElements < 0:
1894            raise ValueError("second tuple value must be greater or equal to first tuple value")
1895        if minElements == optElements == 0:
1896            raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1897
1898        if (optElements):
1899            def makeOptionalList(n):
1900                if n>1:
1901                    return Optional(self + makeOptionalList(n-1))
1902                else:
1903                    return Optional(self)
1904            if minElements:
1905                if minElements == 1:
1906                    ret = self + makeOptionalList(optElements)
1907                else:
1908                    ret = And([self]*minElements) + makeOptionalList(optElements)
1909            else:
1910                ret = makeOptionalList(optElements)
1911        else:
1912            if minElements == 1:
1913                ret = self
1914            else:
1915                ret = And([self]*minElements)
1916        return ret
1917
1918    def __rmul__(self, other):
1919        return self.__mul__(other)
1920
1921    def __or__(self, other ):
1922        """
1923        Implementation of | operator - returns C{L{MatchFirst}}
1924        """
1925        if isinstance( other, basestring ):
1926            other = ParserElement._literalStringClass( other )
1927        if not isinstance( other, ParserElement ):
1928            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1929                    SyntaxWarning, stacklevel=2)
1930            return None
1931        return MatchFirst( [ self, other ] )
1932
1933    def __ror__(self, other ):
1934        """
1935        Implementation of | operator when left operand is not a C{L{ParserElement}}
1936        """
1937        if isinstance( other, basestring ):
1938            other = ParserElement._literalStringClass( other )
1939        if not isinstance( other, ParserElement ):
1940            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1941                    SyntaxWarning, stacklevel=2)
1942            return None
1943        return other | self
1944
1945    def __xor__(self, other ):
1946        """
1947        Implementation of ^ operator - returns C{L{Or}}
1948        """
1949        if isinstance( other, basestring ):
1950            other = ParserElement._literalStringClass( other )
1951        if not isinstance( other, ParserElement ):
1952            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1953                    SyntaxWarning, stacklevel=2)
1954            return None
1955        return Or( [ self, other ] )
1956
1957    def __rxor__(self, other ):
1958        """
1959        Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1960        """
1961        if isinstance( other, basestring ):
1962            other = ParserElement._literalStringClass( other )
1963        if not isinstance( other, ParserElement ):
1964            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1965                    SyntaxWarning, stacklevel=2)
1966            return None
1967        return other ^ self
1968
1969    def __and__(self, other ):
1970        """
1971        Implementation of & operator - returns C{L{Each}}
1972        """
1973        if isinstance( other, basestring ):
1974            other = ParserElement._literalStringClass( other )
1975        if not isinstance( other, ParserElement ):
1976            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1977                    SyntaxWarning, stacklevel=2)
1978            return None
1979        return Each( [ self, other ] )
1980
1981    def __rand__(self, other ):
1982        """
1983        Implementation of & operator when left operand is not a C{L{ParserElement}}
1984        """
1985        if isinstance( other, basestring ):
1986            other = ParserElement._literalStringClass( other )
1987        if not isinstance( other, ParserElement ):
1988            warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1989                    SyntaxWarning, stacklevel=2)
1990            return None
1991        return other & self
1992
1993    def __invert__( self ):
1994        """
1995        Implementation of ~ operator - returns C{L{NotAny}}
1996        """
1997        return NotAny( self )
1998
1999    def __call__(self, name=None):
2000        """
2001        Shortcut for C{L{setResultsName}}, with C{listAllMatches=False}.
2002
2003        If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
2004        passed as C{True}.
2005
2006        If C{name} is omitted, same as calling C{L{copy}}.
2007
2008        Example::
2009            # these are equivalent
2010            userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
2011            userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
2012        """
2013        if name is not None:
2014            return self.setResultsName(name)
2015        else:
2016            return self.copy()
2017
2018    def suppress( self ):
2019        """
2020        Suppresses the output of this C{ParserElement}; useful to keep punctuation from
2021        cluttering up returned output.
2022        """
2023        return Suppress( self )
2024
2025    def leaveWhitespace( self ):
2026        """
2027        Disables the skipping of whitespace before matching the characters in the
2028        C{ParserElement}'s defined pattern.  This is normally only used internally by
2029        the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2030        """
2031        self.skipWhitespace = False
2032        return self
2033
2034    def setWhitespaceChars( self, chars ):
2035        """
2036        Overrides the default whitespace chars
2037        """
2038        self.skipWhitespace = True
2039        self.whiteChars = chars
2040        self.copyDefaultWhiteChars = False
2041        return self
2042
2043    def parseWithTabs( self ):
2044        """
2045        Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2046        Must be called before C{parseString} when the input grammar contains elements that
2047        match C{<TAB>} characters.
2048        """
2049        self.keepTabs = True
2050        return self
2051
2052    def ignore( self, other ):
2053        """
2054        Define expression to be ignored (e.g., comments) while doing pattern
2055        matching; may be called repeatedly, to define multiple comment or other
2056        ignorable patterns.
2057
2058        Example::
2059            patt = OneOrMore(Word(alphas))
2060            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2061
2062            patt.ignore(cStyleComment)
2063            patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2064        """
2065        if isinstance(other, basestring):
2066            other = Suppress(other)
2067
2068        if isinstance( other, Suppress ):
2069            if other not in self.ignoreExprs:
2070                self.ignoreExprs.append(other)
2071        else:
2072            self.ignoreExprs.append( Suppress( other.copy() ) )
2073        return self
2074
2075    def setDebugActions( self, startAction, successAction, exceptionAction ):
2076        """
2077        Enable display of debugging messages while doing pattern matching.
2078        """
2079        self.debugActions = (startAction or _defaultStartDebugAction,
2080                             successAction or _defaultSuccessDebugAction,
2081                             exceptionAction or _defaultExceptionDebugAction)
2082        self.debug = True
2083        return self
2084
2085    def setDebug( self, flag=True ):
2086        """
2087        Enable display of debugging messages while doing pattern matching.
2088        Set C{flag} to True to enable, False to disable.
2089
2090        Example::
2091            wd = Word(alphas).setName("alphaword")
2092            integer = Word(nums).setName("numword")
2093            term = wd | integer
2094
2095            # turn on debugging for wd
2096            wd.setDebug()
2097
2098            OneOrMore(term).parseString("abc 123 xyz 890")
2099
2100        prints::
2101            Match alphaword at loc 0(1,1)
2102            Matched alphaword -> ['abc']
2103            Match alphaword at loc 3(1,4)
2104            Exception raised:Expected alphaword (at char 4), (line:1, col:5)
2105            Match alphaword at loc 7(1,8)
2106            Matched alphaword -> ['xyz']
2107            Match alphaword at loc 11(1,12)
2108            Exception raised:Expected alphaword (at char 12), (line:1, col:13)
2109            Match alphaword at loc 15(1,16)
2110            Exception raised:Expected alphaword (at char 15), (line:1, col:16)
2111
2112        The output shown is that produced by the default debug actions - custom debug actions can be
2113        specified using L{setDebugActions}. Prior to attempting
2114        to match the C{wd} expression, the debugging message C{"Match <exprname> at loc <n>(<line>,<col>)"}
2115        is shown. Then if the parse succeeds, a C{"Matched"} message is shown, or an C{"Exception raised"}
2116        message is shown. Also note the use of L{setName} to assign a human-readable name to the expression,
2117        which makes debugging and exception messages easier to understand - for instance, the default
2118        name created for the C{Word} expression without calling C{setName} is C{"W:(ABCD...)"}.
2119        """
2120        if flag:
2121            self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2122        else:
2123            self.debug = False
2124        return self
2125
2126    def __str__( self ):
2127        return self.name
2128
2129    def __repr__( self ):
2130        return _ustr(self)
2131
2132    def streamline( self ):
2133        self.streamlined = True
2134        self.strRepr = None
2135        return self
2136
2137    def checkRecursion( self, parseElementList ):
2138        pass
2139
2140    def validate( self, validateTrace=[] ):
2141        """
2142        Check defined expressions for valid structure, check for infinite recursive definitions.
2143        """
2144        self.checkRecursion( [] )
2145
2146    def parseFile( self, file_or_filename, parseAll=False ):
2147        """
2148        Execute the parse expression on the given file or filename.
2149        If a filename is specified (instead of a file object),
2150        the entire file is opened, read, and closed before parsing.
2151        """
2152        try:
2153            file_contents = file_or_filename.read()
2154        except AttributeError:
2155            with open(file_or_filename, "r") as f:
2156                file_contents = f.read()
2157        try:
2158            return self.parseString(file_contents, parseAll)
2159        except ParseBaseException as exc:
2160            if ParserElement.verbose_stacktrace:
2161                raise
2162            else:
2163                # catch and re-raise exception from here, clears out pyparsing internal stack trace
2164                raise exc
2165
2166    def __eq__(self,other):
2167        if isinstance(other, ParserElement):
2168            return self is other or vars(self) == vars(other)
2169        elif isinstance(other, basestring):
2170            return self.matches(other)
2171        else:
2172            return super(ParserElement,self)==other
2173
2174    def __ne__(self,other):
2175        return not (self == other)
2176
2177    def __hash__(self):
2178        return hash(id(self))
2179
2180    def __req__(self,other):
2181        return self == other
2182
2183    def __rne__(self,other):
2184        return not (self == other)
2185
2186    def matches(self, testString, parseAll=True):
2187        """
2188        Method for quick testing of a parser against a test string. Good for simple
2189        inline microtests of sub expressions while building up larger parser.
2190
2191        Parameters:
2192         - testString - to test against this expression for a match
2193         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2194
2195        Example::
2196            expr = Word(nums)
2197            assert expr.matches("100")
2198        """
2199        try:
2200            self.parseString(_ustr(testString), parseAll=parseAll)
2201            return True
2202        except ParseBaseException:
2203            return False
2204
2205    def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2206        """
2207        Execute the parse expression on a series of test strings, showing each
2208        test, the parsed results or where the parse failed. Quick and easy way to
2209        run a parse expression against a list of sample strings.
2210
2211        Parameters:
2212         - tests - a list of separate test strings, or a multiline string of test strings
2213         - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2214         - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2215              string; pass None to disable comment filtering
2216         - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2217              if False, only dump nested list
2218         - printResults - (default=C{True}) prints test output to stdout
2219         - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2220
2221        Returns: a (success, results) tuple, where success indicates that all tests succeeded
2222        (or failed if C{failureTests} is True), and the results contain a list of lines of each
2223        test's output
2224
2225        Example::
2226            number_expr = pyparsing_common.number.copy()
2227
2228            result = number_expr.runTests('''
2229                # unsigned integer
2230                100
2231                # negative integer
2232                -100
2233                # float with scientific notation
2234                6.02e23
2235                # integer with scientific notation
2236                1e-12
2237                ''')
2238            print("Success" if result[0] else "Failed!")
2239
2240            result = number_expr.runTests('''
2241                # stray character
2242                100Z
2243                # missing leading digit before '.'
2244                -.100
2245                # too many '.'
2246                3.14.159
2247                ''', failureTests=True)
2248            print("Success" if result[0] else "Failed!")
2249        prints::
2250            # unsigned integer
2251            100
2252            [100]
2253
2254            # negative integer
2255            -100
2256            [-100]
2257
2258            # float with scientific notation
2259            6.02e23
2260            [6.02e+23]
2261
2262            # integer with scientific notation
2263            1e-12
2264            [1e-12]
2265
2266            Success
2267
2268            # stray character
2269            100Z
2270               ^
2271            FAIL: Expected end of text (at char 3), (line:1, col:4)
2272
2273            # missing leading digit before '.'
2274            -.100
2275            ^
2276            FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2277
2278            # too many '.'
2279            3.14.159
2280                ^
2281            FAIL: Expected end of text (at char 4), (line:1, col:5)
2282
2283            Success
2284
2285        Each test string must be on a single line. If you want to test a string that spans multiple
2286        lines, create a test like this::
2287
2288            expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines")
2289
2290        (Note that this is a raw string literal, you must include the leading 'r'.)
2291        """
2292        if isinstance(tests, basestring):
2293            tests = list(map(str.strip, tests.rstrip().splitlines()))
2294        if isinstance(comment, basestring):
2295            comment = Literal(comment)
2296        allResults = []
2297        comments = []
2298        success = True
2299        for t in tests:
2300            if comment is not None and comment.matches(t, False) or comments and not t:
2301                comments.append(t)
2302                continue
2303            if not t:
2304                continue
2305            out = ['\n'.join(comments), t]
2306            comments = []
2307            try:
2308                t = t.replace(r'\n','\n')
2309                result = self.parseString(t, parseAll=parseAll)
2310                out.append(result.dump(full=fullDump))
2311                success = success and not failureTests
2312            except ParseBaseException as pe:
2313                fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2314                if '\n' in t:
2315                    out.append(line(pe.loc, t))
2316                    out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2317                else:
2318                    out.append(' '*pe.loc + '^' + fatal)
2319                out.append("FAIL: " + str(pe))
2320                success = success and failureTests
2321                result = pe
2322            except Exception as exc:
2323                out.append("FAIL-EXCEPTION: " + str(exc))
2324                success = success and failureTests
2325                result = exc
2326
2327            if printResults:
2328                if fullDump:
2329                    out.append('')
2330                print('\n'.join(out))
2331
2332            allResults.append((t, result))
2333
2334        return success, allResults
2335
2336
2337class Token(ParserElement):
2338    """
2339    Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2340    """
2341    def __init__( self ):
2342        super(Token,self).__init__( savelist=False )
2343
2344
2345class Empty(Token):
2346    """
2347    An empty token, will always match.
2348    """
2349    def __init__( self ):
2350        super(Empty,self).__init__()
2351        self.name = "Empty"
2352        self.mayReturnEmpty = True
2353        self.mayIndexError = False
2354
2355
2356class NoMatch(Token):
2357    """
2358    A token that will never match.
2359    """
2360    def __init__( self ):
2361        super(NoMatch,self).__init__()
2362        self.name = "NoMatch"
2363        self.mayReturnEmpty = True
2364        self.mayIndexError = False
2365        self.errmsg = "Unmatchable token"
2366
2367    def parseImpl( self, instring, loc, doActions=True ):
2368        raise ParseException(instring, loc, self.errmsg, self)
2369
2370
2371class Literal(Token):
2372    """
2373    Token to exactly match a specified string.
2374
2375    Example::
2376        Literal('blah').parseString('blah')  # -> ['blah']
2377        Literal('blah').parseString('blahfooblah')  # -> ['blah']
2378        Literal('blah').parseString('bla')  # -> Exception: Expected "blah"
2379
2380    For case-insensitive matching, use L{CaselessLiteral}.
2381
2382    For keyword matching (force word break before and after the matched string),
2383    use L{Keyword} or L{CaselessKeyword}.
2384    """
2385    def __init__( self, matchString ):
2386        super(Literal,self).__init__()
2387        self.match = matchString
2388        self.matchLen = len(matchString)
2389        try:
2390            self.firstMatchChar = matchString[0]
2391        except IndexError:
2392            warnings.warn("null string passed to Literal; use Empty() instead",
2393                            SyntaxWarning, stacklevel=2)
2394            self.__class__ = Empty
2395        self.name = '"%s"' % _ustr(self.match)
2396        self.errmsg = "Expected " + self.name
2397        self.mayReturnEmpty = False
2398        self.mayIndexError = False
2399
2400    # Performance tuning: this routine gets called a *lot*
2401    # if this is a single character match string  and the first character matches,
2402    # short-circuit as quickly as possible, and avoid calling startswith
2403    #~ @profile
2404    def parseImpl( self, instring, loc, doActions=True ):
2405        if (instring[loc] == self.firstMatchChar and
2406            (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2407            return loc+self.matchLen, self.match
2408        raise ParseException(instring, loc, self.errmsg, self)
2409_L = Literal
2410ParserElement._literalStringClass = Literal
2411
2412class Keyword(Token):
2413    """
2414    Token to exactly match a specified string as a keyword, that is, it must be
2415    immediately followed by a non-keyword character.  Compare with C{L{Literal}}:
2416     - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2417     - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2418    Accepts two optional constructor arguments in addition to the keyword string:
2419     - C{identChars} is a string of characters that would be valid identifier characters,
2420          defaulting to all alphanumerics + "_" and "$"
2421     - C{caseless} allows case-insensitive matching, default is C{False}.
2422
2423    Example::
2424        Keyword("start").parseString("start")  # -> ['start']
2425        Keyword("start").parseString("starting")  # -> Exception
2426
2427    For case-insensitive matching, use L{CaselessKeyword}.
2428    """
2429    DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2430
2431    def __init__( self, matchString, identChars=None, caseless=False ):
2432        super(Keyword,self).__init__()
2433        if identChars is None:
2434            identChars = Keyword.DEFAULT_KEYWORD_CHARS
2435        self.match = matchString
2436        self.matchLen = len(matchString)
2437        try:
2438            self.firstMatchChar = matchString[0]
2439        except IndexError:
2440            warnings.warn("null string passed to Keyword; use Empty() instead",
2441                            SyntaxWarning, stacklevel=2)
2442        self.name = '"%s"' % self.match
2443        self.errmsg = "Expected " + self.name
2444        self.mayReturnEmpty = False
2445        self.mayIndexError = False
2446        self.caseless = caseless
2447        if caseless:
2448            self.caselessmatch = matchString.upper()
2449            identChars = identChars.upper()
2450        self.identChars = set(identChars)
2451
2452    def parseImpl( self, instring, loc, doActions=True ):
2453        if self.caseless:
2454            if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2455                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2456                 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2457                return loc+self.matchLen, self.match
2458        else:
2459            if (instring[loc] == self.firstMatchChar and
2460                (self.matchLen==1 or instring.startswith(self.match,loc)) and
2461                (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2462                (loc == 0 or instring[loc-1] not in self.identChars) ):
2463                return loc+self.matchLen, self.match
2464        raise ParseException(instring, loc, self.errmsg, self)
2465
2466    def copy(self):
2467        c = super(Keyword,self).copy()
2468        c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
2469        return c
2470
2471    @staticmethod
2472    def setDefaultKeywordChars( chars ):
2473        """Overrides the default Keyword chars
2474        """
2475        Keyword.DEFAULT_KEYWORD_CHARS = chars
2476
2477class CaselessLiteral(Literal):
2478    """
2479    Token to match a specified string, ignoring case of letters.
2480    Note: the matched results will always be in the case of the given
2481    match string, NOT the case of the input text.
2482
2483    Example::
2484        OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2485
2486    (Contrast with example for L{CaselessKeyword}.)
2487    """
2488    def __init__( self, matchString ):
2489        super(CaselessLiteral,self).__init__( matchString.upper() )
2490        # Preserve the defining literal.
2491        self.returnString = matchString
2492        self.name = "'%s'" % self.returnString
2493        self.errmsg = "Expected " + self.name
2494
2495    def parseImpl( self, instring, loc, doActions=True ):
2496        if instring[ loc:loc+self.matchLen ].upper() == self.match:
2497            return loc+self.matchLen, self.returnString
2498        raise ParseException(instring, loc, self.errmsg, self)
2499
2500class CaselessKeyword(Keyword):
2501    """
2502    Caseless version of L{Keyword}.
2503
2504    Example::
2505        OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2506
2507    (Contrast with example for L{CaselessLiteral}.)
2508    """
2509    def __init__( self, matchString, identChars=None ):
2510        super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2511
2512    def parseImpl( self, instring, loc, doActions=True ):
2513        if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2514             (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2515            return loc+self.matchLen, self.match
2516        raise ParseException(instring, loc, self.errmsg, self)
2517
2518class CloseMatch(Token):
2519    """
2520    A variation on L{Literal} which matches "close" matches, that is,
2521    strings with at most 'n' mismatching characters. C{CloseMatch} takes parameters:
2522     - C{match_string} - string to be matched
2523     - C{maxMismatches} - (C{default=1}) maximum number of mismatches allowed to count as a match
2524
2525    The results from a successful parse will contain the matched text from the input string and the following named results:
2526     - C{mismatches} - a list of the positions within the match_string where mismatches were found
2527     - C{original} - the original match_string used to compare against the input string
2528
2529    If C{mismatches} is an empty list, then the match was an exact match.
2530
2531    Example::
2532        patt = CloseMatch("ATCATCGAATGGA")
2533        patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']})
2534        patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1)
2535
2536        # exact match
2537        patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']})
2538
2539        # close match allowing up to 2 mismatches
2540        patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2)
2541        patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']})
2542    """
2543    def __init__(self, match_string, maxMismatches=1):
2544        super(CloseMatch,self).__init__()
2545        self.name = match_string
2546        self.match_string = match_string
2547        self.maxMismatches = maxMismatches
2548        self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches)
2549        self.mayIndexError = False
2550        self.mayReturnEmpty = False
2551
2552    def parseImpl( self, instring, loc, doActions=True ):
2553        start = loc
2554        instrlen = len(instring)
2555        maxloc = start + len(self.match_string)
2556
2557        if maxloc <= instrlen:
2558            match_string = self.match_string
2559            match_stringloc = 0
2560            mismatches = []
2561            maxMismatches = self.maxMismatches
2562
2563            for match_stringloc,s_m in enumerate(zip(instring[loc:maxloc], self.match_string)):
2564                src,mat = s_m
2565                if src != mat:
2566                    mismatches.append(match_stringloc)
2567                    if len(mismatches) > maxMismatches:
2568                        break
2569            else:
2570                loc = match_stringloc + 1
2571                results = ParseResults([instring[start:loc]])
2572                results['original'] = self.match_string
2573                results['mismatches'] = mismatches
2574                return loc, results
2575
2576        raise ParseException(instring, loc, self.errmsg, self)
2577
2578
2579class Word(Token):
2580    """
2581    Token for matching words composed of allowed character sets.
2582    Defined with string containing all allowed initial characters,
2583    an optional string containing allowed body characters (if omitted,
2584    defaults to the initial character set), and an optional minimum,
2585    maximum, and/or exact length.  The default value for C{min} is 1 (a
2586    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2587    are 0, meaning no maximum or exact length restriction. An optional
2588    C{excludeChars} parameter can list characters that might be found in
2589    the input C{bodyChars} string; useful to define a word of all printables
2590    except for one or two characters, for instance.
2591
2592    L{srange} is useful for defining custom character set strings for defining
2593    C{Word} expressions, using range notation from regular expression character sets.
2594
2595    A common mistake is to use C{Word} to match a specific literal string, as in
2596    C{Word("Address")}. Remember that C{Word} uses the string argument to define
2597    I{sets} of matchable characters. This expression would match "Add", "AAA",
2598    "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2599    To match an exact literal string, use L{Literal} or L{Keyword}.
2600
2601    pyparsing includes helper strings for building Words:
2602     - L{alphas}
2603     - L{nums}
2604     - L{alphanums}
2605     - L{hexnums}
2606     - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2607     - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2608     - L{printables} (any non-whitespace character)
2609
2610    Example::
2611        # a word composed of digits
2612        integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2613
2614        # a word with a leading capital, and zero or more lowercase
2615        capital_word = Word(alphas.upper(), alphas.lower())
2616
2617        # hostnames are alphanumeric, with leading alpha, and '-'
2618        hostname = Word(alphas, alphanums+'-')
2619
2620        # roman numeral (not a strict parser, accepts invalid mix of characters)
2621        roman = Word("IVXLCDM")
2622
2623        # any string of non-whitespace characters, except for ','
2624        csv_value = Word(printables, excludeChars=",")
2625    """
2626    def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2627        super(Word,self).__init__()
2628        if excludeChars:
2629            initChars = ''.join(c for c in initChars if c not in excludeChars)
2630            if bodyChars:
2631                bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2632        self.initCharsOrig = initChars
2633        self.initChars = set(initChars)
2634        if bodyChars :
2635            self.bodyCharsOrig = bodyChars
2636            self.bodyChars = set(bodyChars)
2637        else:
2638            self.bodyCharsOrig = initChars
2639            self.bodyChars = set(initChars)
2640
2641        self.maxSpecified = max > 0
2642
2643        if min < 1:
2644            raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2645
2646        self.minLen = min
2647
2648        if max > 0:
2649            self.maxLen = max
2650        else:
2651            self.maxLen = _MAX_INT
2652
2653        if exact > 0:
2654            self.maxLen = exact
2655            self.minLen = exact
2656
2657        self.name = _ustr(self)
2658        self.errmsg = "Expected " + self.name
2659        self.mayIndexError = False
2660        self.asKeyword = asKeyword
2661
2662        if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2663            if self.bodyCharsOrig == self.initCharsOrig:
2664                self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2665            elif len(self.initCharsOrig) == 1:
2666                self.reString = "%s[%s]*" % \
2667                                      (re.escape(self.initCharsOrig),
2668                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2669            else:
2670                self.reString = "[%s][%s]*" % \
2671                                      (_escapeRegexRangeChars(self.initCharsOrig),
2672                                      _escapeRegexRangeChars(self.bodyCharsOrig),)
2673            if self.asKeyword:
2674                self.reString = r"\b"+self.reString+r"\b"
2675            try:
2676                self.re = re.compile( self.reString )
2677            except Exception:
2678                self.re = None
2679
2680    def parseImpl( self, instring, loc, doActions=True ):
2681        if self.re:
2682            result = self.re.match(instring,loc)
2683            if not result:
2684                raise ParseException(instring, loc, self.errmsg, self)
2685
2686            loc = result.end()
2687            return loc, result.group()
2688
2689        if not(instring[ loc ] in self.initChars):
2690            raise ParseException(instring, loc, self.errmsg, self)
2691
2692        start = loc
2693        loc += 1
2694        instrlen = len(instring)
2695        bodychars = self.bodyChars
2696        maxloc = start + self.maxLen
2697        maxloc = min( maxloc, instrlen )
2698        while loc < maxloc and instring[loc] in bodychars:
2699            loc += 1
2700
2701        throwException = False
2702        if loc - start < self.minLen:
2703            throwException = True
2704        if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2705            throwException = True
2706        if self.asKeyword:
2707            if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2708                throwException = True
2709
2710        if throwException:
2711            raise ParseException(instring, loc, self.errmsg, self)
2712
2713        return loc, instring[start:loc]
2714
2715    def __str__( self ):
2716        try:
2717            return super(Word,self).__str__()
2718        except Exception:
2719            pass
2720
2721
2722        if self.strRepr is None:
2723
2724            def charsAsStr(s):
2725                if len(s)>4:
2726                    return s[:4]+"..."
2727                else:
2728                    return s
2729
2730            if ( self.initCharsOrig != self.bodyCharsOrig ):
2731                self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2732            else:
2733                self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2734
2735        return self.strRepr
2736
2737
2738class Regex(Token):
2739    r"""
2740    Token for matching strings that match a given regular expression.
2741    Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2742    If the given regex contains named groups (defined using C{(?P<name>...)}), these will be preserved as
2743    named parse results.
2744
2745    Example::
2746        realnum = Regex(r"[+-]?\d+\.\d*")
2747        date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)')
2748        # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2749        roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2750    """
2751    compiledREtype = type(re.compile("[A-Z]"))
2752    def __init__( self, pattern, flags=0):
2753        """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2754        super(Regex,self).__init__()
2755
2756        if isinstance(pattern, basestring):
2757            if not pattern:
2758                warnings.warn("null string passed to Regex; use Empty() instead",
2759                        SyntaxWarning, stacklevel=2)
2760
2761            self.pattern = pattern
2762            self.flags = flags
2763
2764            try:
2765                self.re = re.compile(self.pattern, self.flags)
2766                self.reString = self.pattern
2767            except sre_constants.error:
2768                warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2769                    SyntaxWarning, stacklevel=2)
2770                raise
2771
2772        elif isinstance(pattern, Regex.compiledREtype):
2773            self.re = pattern
2774            self.pattern = \
2775            self.reString = str(pattern)
2776            self.flags = flags
2777
2778        else:
2779            raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2780
2781        self.name = _ustr(self)
2782        self.errmsg = "Expected " + self.name
2783        self.mayIndexError = False
2784        self.mayReturnEmpty = True
2785
2786    def parseImpl( self, instring, loc, doActions=True ):
2787        result = self.re.match(instring,loc)
2788        if not result:
2789            raise ParseException(instring, loc, self.errmsg, self)
2790
2791        loc = result.end()
2792        d = result.groupdict()
2793        ret = ParseResults(result.group())
2794        if d:
2795            for k in d:
2796                ret[k] = d[k]
2797        return loc,ret
2798
2799    def __str__( self ):
2800        try:
2801            return super(Regex,self).__str__()
2802        except Exception:
2803            pass
2804
2805        if self.strRepr is None:
2806            self.strRepr = "Re:(%s)" % repr(self.pattern)
2807
2808        return self.strRepr
2809
2810
2811class QuotedString(Token):
2812    r"""
2813    Token for matching strings that are delimited by quoting characters.
2814
2815    Defined with the following parameters:
2816        - quoteChar - string of one or more characters defining the quote delimiting string
2817        - escChar - character to escape quotes, typically backslash (default=C{None})
2818        - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2819        - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2820        - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2821        - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2822        - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2823
2824    Example::
2825        qs = QuotedString('"')
2826        print(qs.searchString('lsjdf "This is the quote" sldjf'))
2827        complex_qs = QuotedString('{{', endQuoteChar='}}')
2828        print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2829        sql_qs = QuotedString('"', escQuote='""')
2830        print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2831    prints::
2832        [['This is the quote']]
2833        [['This is the "quote"']]
2834        [['This is the quote with "embedded" quotes']]
2835    """
2836    def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2837        super(QuotedString,self).__init__()
2838
2839        # remove white space from quote chars - won't work anyway
2840        quoteChar = quoteChar.strip()
2841        if not quoteChar:
2842            warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2843            raise SyntaxError()
2844
2845        if endQuoteChar is None:
2846            endQuoteChar = quoteChar
2847        else:
2848            endQuoteChar = endQuoteChar.strip()
2849            if not endQuoteChar:
2850                warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2851                raise SyntaxError()
2852
2853        self.quoteChar = quoteChar
2854        self.quoteCharLen = len(quoteChar)
2855        self.firstQuoteChar = quoteChar[0]
2856        self.endQuoteChar = endQuoteChar
2857        self.endQuoteCharLen = len(endQuoteChar)
2858        self.escChar = escChar
2859        self.escQuote = escQuote
2860        self.unquoteResults = unquoteResults
2861        self.convertWhitespaceEscapes = convertWhitespaceEscapes
2862
2863        if multiline:
2864            self.flags = re.MULTILINE | re.DOTALL
2865            self.pattern = r'%s(?:[^%s%s]' % \
2866                ( re.escape(self.quoteChar),
2867                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2868                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2869        else:
2870            self.flags = 0
2871            self.pattern = r'%s(?:[^%s\n\r%s]' % \
2872                ( re.escape(self.quoteChar),
2873                  _escapeRegexRangeChars(self.endQuoteChar[0]),
2874                  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2875        if len(self.endQuoteChar) > 1:
2876            self.pattern += (
2877                '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2878                                               _escapeRegexRangeChars(self.endQuoteChar[i]))
2879                                    for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2880                )
2881        if escQuote:
2882            self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2883        if escChar:
2884            self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2885            self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2886        self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2887
2888        try:
2889            self.re = re.compile(self.pattern, self.flags)
2890            self.reString = self.pattern
2891        except sre_constants.error:
2892            warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2893                SyntaxWarning, stacklevel=2)
2894            raise
2895
2896        self.name = _ustr(self)
2897        self.errmsg = "Expected " + self.name
2898        self.mayIndexError = False
2899        self.mayReturnEmpty = True
2900
2901    def parseImpl( self, instring, loc, doActions=True ):
2902        result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2903        if not result:
2904            raise ParseException(instring, loc, self.errmsg, self)
2905
2906        loc = result.end()
2907        ret = result.group()
2908
2909        if self.unquoteResults:
2910
2911            # strip off quotes
2912            ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2913
2914            if isinstance(ret,basestring):
2915                # replace escaped whitespace
2916                if '\\' in ret and self.convertWhitespaceEscapes:
2917                    ws_map = {
2918                        r'\t' : '\t',
2919                        r'\n' : '\n',
2920                        r'\f' : '\f',
2921                        r'\r' : '\r',
2922                    }
2923                    for wslit,wschar in ws_map.items():
2924                        ret = ret.replace(wslit, wschar)
2925
2926                # replace escaped characters
2927                if self.escChar:
2928                    ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret)
2929
2930                # replace escaped quotes
2931                if self.escQuote:
2932                    ret = ret.replace(self.escQuote, self.endQuoteChar)
2933
2934        return loc, ret
2935
2936    def __str__( self ):
2937        try:
2938            return super(QuotedString,self).__str__()
2939        except Exception:
2940            pass
2941
2942        if self.strRepr is None:
2943            self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2944
2945        return self.strRepr
2946
2947
2948class CharsNotIn(Token):
2949    """
2950    Token for matching words composed of characters I{not} in a given set (will
2951    include whitespace in matched characters if not listed in the provided exclusion set - see example).
2952    Defined with string containing all disallowed characters, and an optional
2953    minimum, maximum, and/or exact length.  The default value for C{min} is 1 (a
2954    minimum value < 1 is not valid); the default values for C{max} and C{exact}
2955    are 0, meaning no maximum or exact length restriction.
2956
2957    Example::
2958        # define a comma-separated-value as anything that is not a ','
2959        csv_value = CharsNotIn(',')
2960        print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2961    prints::
2962        ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2963    """
2964    def __init__( self, notChars, min=1, max=0, exact=0 ):
2965        super(CharsNotIn,self).__init__()
2966        self.skipWhitespace = False
2967        self.notChars = notChars
2968
2969        if min < 1:
2970            raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2971
2972        self.minLen = min
2973
2974        if max > 0:
2975            self.maxLen = max
2976        else:
2977            self.maxLen = _MAX_INT
2978
2979        if exact > 0:
2980            self.maxLen = exact
2981            self.minLen = exact
2982
2983        self.name = _ustr(self)
2984        self.errmsg = "Expected " + self.name
2985        self.mayReturnEmpty = ( self.minLen == 0 )
2986        self.mayIndexError = False
2987
2988    def parseImpl( self, instring, loc, doActions=True ):
2989        if instring[loc] in self.notChars:
2990            raise ParseException(instring, loc, self.errmsg, self)
2991
2992        start = loc
2993        loc += 1
2994        notchars = self.notChars
2995        maxlen = min( start+self.maxLen, len(instring) )
2996        while loc < maxlen and \
2997              (instring[loc] not in notchars):
2998            loc += 1
2999
3000        if loc - start < self.minLen:
3001            raise ParseException(instring, loc, self.errmsg, self)
3002
3003        return loc, instring[start:loc]
3004
3005    def __str__( self ):
3006        try:
3007            return super(CharsNotIn, self).__str__()
3008        except Exception:
3009            pass
3010
3011        if self.strRepr is None:
3012            if len(self.notChars) > 4:
3013                self.strRepr = "!W:(%s...)" % self.notChars[:4]
3014            else:
3015                self.strRepr = "!W:(%s)" % self.notChars
3016
3017        return self.strRepr
3018
3019class White(Token):
3020    """
3021    Special matching class for matching whitespace.  Normally, whitespace is ignored
3022    by pyparsing grammars.  This class is included when some whitespace structures
3023    are significant.  Define with a string containing the whitespace characters to be
3024    matched; default is C{" \\t\\r\\n"}.  Also takes optional C{min}, C{max}, and C{exact} arguments,
3025    as defined for the C{L{Word}} class.
3026    """
3027    whiteStrs = {
3028        " " : "<SPC>",
3029        "\t": "<TAB>",
3030        "\n": "<LF>",
3031        "\r": "<CR>",
3032        "\f": "<FF>",
3033        }
3034    def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
3035        super(White,self).__init__()
3036        self.matchWhite = ws
3037        self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
3038        #~ self.leaveWhitespace()
3039        self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
3040        self.mayReturnEmpty = True
3041        self.errmsg = "Expected " + self.name
3042
3043        self.minLen = min
3044
3045        if max > 0:
3046            self.maxLen = max
3047        else:
3048            self.maxLen = _MAX_INT
3049
3050        if exact > 0:
3051            self.maxLen = exact
3052            self.minLen = exact
3053
3054    def parseImpl( self, instring, loc, doActions=True ):
3055        if not(instring[ loc ] in self.matchWhite):
3056            raise ParseException(instring, loc, self.errmsg, self)
3057        start = loc
3058        loc += 1
3059        maxloc = start + self.maxLen
3060        maxloc = min( maxloc, len(instring) )
3061        while loc < maxloc and instring[loc] in self.matchWhite:
3062            loc += 1
3063
3064        if loc - start < self.minLen:
3065            raise ParseException(instring, loc, self.errmsg, self)
3066
3067        return loc, instring[start:loc]
3068
3069
3070class _PositionToken(Token):
3071    def __init__( self ):
3072        super(_PositionToken,self).__init__()
3073        self.name=self.__class__.__name__
3074        self.mayReturnEmpty = True
3075        self.mayIndexError = False
3076
3077class GoToColumn(_PositionToken):
3078    """
3079    Token to advance to a specific column of input text; useful for tabular report scraping.
3080    """
3081    def __init__( self, colno ):
3082        super(GoToColumn,self).__init__()
3083        self.col = colno
3084
3085    def preParse( self, instring, loc ):
3086        if col(loc,instring) != self.col:
3087            instrlen = len(instring)
3088            if self.ignoreExprs:
3089                loc = self._skipIgnorables( instring, loc )
3090            while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
3091                loc += 1
3092        return loc
3093
3094    def parseImpl( self, instring, loc, doActions=True ):
3095        thiscol = col( loc, instring )
3096        if thiscol > self.col:
3097            raise ParseException( instring, loc, "Text not in expected column", self )
3098        newloc = loc + self.col - thiscol
3099        ret = instring[ loc: newloc ]
3100        return newloc, ret
3101
3102
3103class LineStart(_PositionToken):
3104    """
3105    Matches if current position is at the beginning of a line within the parse string
3106
3107    Example::
3108
3109        test = '''\
3110        AAA this line
3111        AAA and this line
3112          AAA but not this one
3113        B AAA and definitely not this one
3114        '''
3115
3116        for t in (LineStart() + 'AAA' + restOfLine).searchString(test):
3117            print(t)
3118
3119    Prints::
3120        ['AAA', ' this line']
3121        ['AAA', ' and this line']
3122
3123    """
3124    def __init__( self ):
3125        super(LineStart,self).__init__()
3126        self.errmsg = "Expected start of line"
3127
3128    def parseImpl( self, instring, loc, doActions=True ):
3129        if col(loc, instring) == 1:
3130            return loc, []
3131        raise ParseException(instring, loc, self.errmsg, self)
3132
3133class LineEnd(_PositionToken):
3134    """
3135    Matches if current position is at the end of a line within the parse string
3136    """
3137    def __init__( self ):
3138        super(LineEnd,self).__init__()
3139        self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
3140        self.errmsg = "Expected end of line"
3141
3142    def parseImpl( self, instring, loc, doActions=True ):
3143        if loc<len(instring):
3144            if instring[loc] == "\n":
3145                return loc+1, "\n"
3146            else:
3147                raise ParseException(instring, loc, self.errmsg, self)
3148        elif loc == len(instring):
3149            return loc+1, []
3150        else:
3151            raise ParseException(instring, loc, self.errmsg, self)
3152
3153class StringStart(_PositionToken):
3154    """
3155    Matches if current position is at the beginning of the parse string
3156    """
3157    def __init__( self ):
3158        super(StringStart,self).__init__()
3159        self.errmsg = "Expected start of text"
3160
3161    def parseImpl( self, instring, loc, doActions=True ):
3162        if loc != 0:
3163            # see if entire string up to here is just whitespace and ignoreables
3164            if loc != self.preParse( instring, 0 ):
3165                raise ParseException(instring, loc, self.errmsg, self)
3166        return loc, []
3167
3168class StringEnd(_PositionToken):
3169    """
3170    Matches if current position is at the end of the parse string
3171    """
3172    def __init__( self ):
3173        super(StringEnd,self).__init__()
3174        self.errmsg = "Expected end of text"
3175
3176    def parseImpl( self, instring, loc, doActions=True ):
3177        if loc < len(instring):
3178            raise ParseException(instring, loc, self.errmsg, self)
3179        elif loc == len(instring):
3180            return loc+1, []
3181        elif loc > len(instring):
3182            return loc, []
3183        else:
3184            raise ParseException(instring, loc, self.errmsg, self)
3185
3186class WordStart(_PositionToken):
3187    """
3188    Matches if the current position is at the beginning of a Word, and
3189    is not preceded by any character in a given set of C{wordChars}
3190    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3191    use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3192    the string being parsed, or at the beginning of a line.
3193    """
3194    def __init__(self, wordChars = printables):
3195        super(WordStart,self).__init__()
3196        self.wordChars = set(wordChars)
3197        self.errmsg = "Not at the start of a word"
3198
3199    def parseImpl(self, instring, loc, doActions=True ):
3200        if loc != 0:
3201            if (instring[loc-1] in self.wordChars or
3202                instring[loc] not in self.wordChars):
3203                raise ParseException(instring, loc, self.errmsg, self)
3204        return loc, []
3205
3206class WordEnd(_PositionToken):
3207    """
3208    Matches if the current position is at the end of a Word, and
3209    is not followed by any character in a given set of C{wordChars}
3210    (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3211    use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3212    the string being parsed, or at the end of a line.
3213    """
3214    def __init__(self, wordChars = printables):
3215        super(WordEnd,self).__init__()
3216        self.wordChars = set(wordChars)
3217        self.skipWhitespace = False
3218        self.errmsg = "Not at the end of a word"
3219
3220    def parseImpl(self, instring, loc, doActions=True ):
3221        instrlen = len(instring)
3222        if instrlen>0 and loc<instrlen:
3223            if (instring[loc] in self.wordChars or
3224                instring[loc-1] not in self.wordChars):
3225                raise ParseException(instring, loc, self.errmsg, self)
3226        return loc, []
3227
3228
3229class ParseExpression(ParserElement):
3230    """
3231    Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3232    """
3233    def __init__( self, exprs, savelist = False ):
3234        super(ParseExpression,self).__init__(savelist)
3235        if isinstance( exprs, _generatorType ):
3236            exprs = list(exprs)
3237
3238        if isinstance( exprs, basestring ):
3239            self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3240        elif isinstance( exprs, collections.Iterable ):
3241            exprs = list(exprs)
3242            # if sequence of strings provided, wrap with Literal
3243            if all(isinstance(expr, basestring) for expr in exprs):
3244                exprs = map(ParserElement._literalStringClass, exprs)
3245            self.exprs = list(exprs)
3246        else:
3247            try:
3248                self.exprs = list( exprs )
3249            except TypeError:
3250                self.exprs = [ exprs ]
3251        self.callPreparse = False
3252
3253    def __getitem__( self, i ):
3254        return self.exprs[i]
3255
3256    def append( self, other ):
3257        self.exprs.append( other )
3258        self.strRepr = None
3259        return self
3260
3261    def leaveWhitespace( self ):
3262        """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3263           all contained expressions."""
3264        self.skipWhitespace = False
3265        self.exprs = [ e.copy() for e in self.exprs ]
3266        for e in self.exprs:
3267            e.leaveWhitespace()
3268        return self
3269
3270    def ignore( self, other ):
3271        if isinstance( other, Suppress ):
3272            if other not in self.ignoreExprs:
3273                super( ParseExpression, self).ignore( other )
3274                for e in self.exprs:
3275                    e.ignore( self.ignoreExprs[-1] )
3276        else:
3277            super( ParseExpression, self).ignore( other )
3278            for e in self.exprs:
3279                e.ignore( self.ignoreExprs[-1] )
3280        return self
3281
3282    def __str__( self ):
3283        try:
3284            return super(ParseExpression,self).__str__()
3285        except Exception:
3286            pass
3287
3288        if self.strRepr is None:
3289            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3290        return self.strRepr
3291
3292    def streamline( self ):
3293        super(ParseExpression,self).streamline()
3294
3295        for e in self.exprs:
3296            e.streamline()
3297
3298        # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
3299        # but only if there are no parse actions or resultsNames on the nested And's
3300        # (likewise for Or's and MatchFirst's)
3301        if ( len(self.exprs) == 2 ):
3302            other = self.exprs[0]
3303            if ( isinstance( other, self.__class__ ) and
3304                  not(other.parseAction) and
3305                  other.resultsName is None and
3306                  not other.debug ):
3307                self.exprs = other.exprs[:] + [ self.exprs[1] ]
3308                self.strRepr = None
3309                self.mayReturnEmpty |= other.mayReturnEmpty
3310                self.mayIndexError  |= other.mayIndexError
3311
3312            other = self.exprs[-1]
3313            if ( isinstance( other, self.__class__ ) and
3314                  not(other.parseAction) and
3315                  other.resultsName is None and
3316                  not other.debug ):
3317                self.exprs = self.exprs[:-1] + other.exprs[:]
3318                self.strRepr = None
3319                self.mayReturnEmpty |= other.mayReturnEmpty
3320                self.mayIndexError  |= other.mayIndexError
3321
3322        self.errmsg = "Expected " + _ustr(self)
3323
3324        return self
3325
3326    def setResultsName( self, name, listAllMatches=False ):
3327        ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
3328        return ret
3329
3330    def validate( self, validateTrace=[] ):
3331        tmp = validateTrace[:]+[self]
3332        for e in self.exprs:
3333            e.validate(tmp)
3334        self.checkRecursion( [] )
3335
3336    def copy(self):
3337        ret = super(ParseExpression,self).copy()
3338        ret.exprs = [e.copy() for e in self.exprs]
3339        return ret
3340
3341class And(ParseExpression):
3342    """
3343    Requires all given C{ParseExpression}s to be found in the given order.
3344    Expressions may be separated by whitespace.
3345    May be constructed using the C{'+'} operator.
3346    May also be constructed using the C{'-'} operator, which will suppress backtracking.
3347
3348    Example::
3349        integer = Word(nums)
3350        name_expr = OneOrMore(Word(alphas))
3351
3352        expr = And([integer("id"),name_expr("name"),integer("age")])
3353        # more easily written as:
3354        expr = integer("id") + name_expr("name") + integer("age")
3355    """
3356
3357    class _ErrorStop(Empty):
3358        def __init__(self, *args, **kwargs):
3359            super(And._ErrorStop,self).__init__(*args, **kwargs)
3360            self.name = '-'
3361            self.leaveWhitespace()
3362
3363    def __init__( self, exprs, savelist = True ):
3364        super(And,self).__init__(exprs, savelist)
3365        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3366        self.setWhitespaceChars( self.exprs[0].whiteChars )
3367        self.skipWhitespace = self.exprs[0].skipWhitespace
3368        self.callPreparse = True
3369
3370    def parseImpl( self, instring, loc, doActions=True ):
3371        # pass False as last arg to _parse for first element, since we already
3372        # pre-parsed the string as part of our And pre-parsing
3373        loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3374        errorStop = False
3375        for e in self.exprs[1:]:
3376            if isinstance(e, And._ErrorStop):
3377                errorStop = True
3378                continue
3379            if errorStop:
3380                try:
3381                    loc, exprtokens = e._parse( instring, loc, doActions )
3382                except ParseSyntaxException:
3383                    raise
3384                except ParseBaseException as pe:
3385                    pe.__traceback__ = None
3386                    raise ParseSyntaxException._from_exception(pe)
3387                except IndexError:
3388                    raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3389            else:
3390                loc, exprtokens = e._parse( instring, loc, doActions )
3391            if exprtokens or exprtokens.haskeys():
3392                resultlist += exprtokens
3393        return loc, resultlist
3394
3395    def __iadd__(self, other ):
3396        if isinstance( other, basestring ):
3397            other = ParserElement._literalStringClass( other )
3398        return self.append( other ) #And( [ self, other ] )
3399
3400    def checkRecursion( self, parseElementList ):
3401        subRecCheckList = parseElementList[:] + [ self ]
3402        for e in self.exprs:
3403            e.checkRecursion( subRecCheckList )
3404            if not e.mayReturnEmpty:
3405                break
3406
3407    def __str__( self ):
3408        if hasattr(self,"name"):
3409            return self.name
3410
3411        if self.strRepr is None:
3412            self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3413
3414        return self.strRepr
3415
3416
3417class Or(ParseExpression):
3418    """
3419    Requires that at least one C{ParseExpression} is found.
3420    If two expressions match, the expression that matches the longest string will be used.
3421    May be constructed using the C{'^'} operator.
3422
3423    Example::
3424        # construct Or using '^' operator
3425
3426        number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3427        print(number.searchString("123 3.1416 789"))
3428    prints::
3429        [['123'], ['3.1416'], ['789']]
3430    """
3431    def __init__( self, exprs, savelist = False ):
3432        super(Or,self).__init__(exprs, savelist)
3433        if self.exprs:
3434            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3435        else:
3436            self.mayReturnEmpty = True
3437
3438    def parseImpl( self, instring, loc, doActions=True ):
3439        maxExcLoc = -1
3440        maxException = None
3441        matches = []
3442        for e in self.exprs:
3443            try:
3444                loc2 = e.tryParse( instring, loc )
3445            except ParseException as err:
3446                err.__traceback__ = None
3447                if err.loc > maxExcLoc:
3448                    maxException = err
3449                    maxExcLoc = err.loc
3450            except IndexError:
3451                if len(instring) > maxExcLoc:
3452                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3453                    maxExcLoc = len(instring)
3454            else:
3455                # save match among all matches, to retry longest to shortest
3456                matches.append((loc2, e))
3457
3458        if matches:
3459            matches.sort(key=lambda x: -x[0])
3460            for _,e in matches:
3461                try:
3462                    return e._parse( instring, loc, doActions )
3463                except ParseException as err:
3464                    err.__traceback__ = None
3465                    if err.loc > maxExcLoc:
3466                        maxException = err
3467                        maxExcLoc = err.loc
3468
3469        if maxException is not None:
3470            maxException.msg = self.errmsg
3471            raise maxException
3472        else:
3473            raise ParseException(instring, loc, "no defined alternatives to match", self)
3474
3475
3476    def __ixor__(self, other ):
3477        if isinstance( other, basestring ):
3478            other = ParserElement._literalStringClass( other )
3479        return self.append( other ) #Or( [ self, other ] )
3480
3481    def __str__( self ):
3482        if hasattr(self,"name"):
3483            return self.name
3484
3485        if self.strRepr is None:
3486            self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3487
3488        return self.strRepr
3489
3490    def checkRecursion( self, parseElementList ):
3491        subRecCheckList = parseElementList[:] + [ self ]
3492        for e in self.exprs:
3493            e.checkRecursion( subRecCheckList )
3494
3495
3496class MatchFirst(ParseExpression):
3497    """
3498    Requires that at least one C{ParseExpression} is found.
3499    If two expressions match, the first one listed is the one that will match.
3500    May be constructed using the C{'|'} operator.
3501
3502    Example::
3503        # construct MatchFirst using '|' operator
3504
3505        # watch the order of expressions to match
3506        number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3507        print(number.searchString("123 3.1416 789")) #  Fail! -> [['123'], ['3'], ['1416'], ['789']]
3508
3509        # put more selective expression first
3510        number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3511        print(number.searchString("123 3.1416 789")) #  Better -> [['123'], ['3.1416'], ['789']]
3512    """
3513    def __init__( self, exprs, savelist = False ):
3514        super(MatchFirst,self).__init__(exprs, savelist)
3515        if self.exprs:
3516            self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3517        else:
3518            self.mayReturnEmpty = True
3519
3520    def parseImpl( self, instring, loc, doActions=True ):
3521        maxExcLoc = -1
3522        maxException = None
3523        for e in self.exprs:
3524            try:
3525                ret = e._parse( instring, loc, doActions )
3526                return ret
3527            except ParseException as err:
3528                if err.loc > maxExcLoc:
3529                    maxException = err
3530                    maxExcLoc = err.loc
3531            except IndexError:
3532                if len(instring) > maxExcLoc:
3533                    maxException = ParseException(instring,len(instring),e.errmsg,self)
3534                    maxExcLoc = len(instring)
3535
3536        # only got here if no expression matched, raise exception for match that made it the furthest
3537        else:
3538            if maxException is not None:
3539                maxException.msg = self.errmsg
3540                raise maxException
3541            else:
3542                raise ParseException(instring, loc, "no defined alternatives to match", self)
3543
3544    def __ior__(self, other ):
3545        if isinstance( other, basestring ):
3546            other = ParserElement._literalStringClass( other )
3547        return self.append( other ) #MatchFirst( [ self, other ] )
3548
3549    def __str__( self ):
3550        if hasattr(self,"name"):
3551            return self.name
3552
3553        if self.strRepr is None:
3554            self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3555
3556        return self.strRepr
3557
3558    def checkRecursion( self, parseElementList ):
3559        subRecCheckList = parseElementList[:] + [ self ]
3560        for e in self.exprs:
3561            e.checkRecursion( subRecCheckList )
3562
3563
3564class Each(ParseExpression):
3565    """
3566    Requires all given C{ParseExpression}s to be found, but in any order.
3567    Expressions may be separated by whitespace.
3568    May be constructed using the C{'&'} operator.
3569
3570    Example::
3571        color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3572        shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3573        integer = Word(nums)
3574        shape_attr = "shape:" + shape_type("shape")
3575        posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3576        color_attr = "color:" + color("color")
3577        size_attr = "size:" + integer("size")
3578
3579        # use Each (using operator '&') to accept attributes in any order
3580        # (shape and posn are required, color and size are optional)
3581        shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3582
3583        shape_spec.runTests('''
3584            shape: SQUARE color: BLACK posn: 100, 120
3585            shape: CIRCLE size: 50 color: BLUE posn: 50,80
3586            color:GREEN size:20 shape:TRIANGLE posn:20,40
3587            '''
3588            )
3589    prints::
3590        shape: SQUARE color: BLACK posn: 100, 120
3591        ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3592        - color: BLACK
3593        - posn: ['100', ',', '120']
3594          - x: 100
3595          - y: 120
3596        - shape: SQUARE
3597
3598
3599        shape: CIRCLE size: 50 color: BLUE posn: 50,80
3600        ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3601        - color: BLUE
3602        - posn: ['50', ',', '80']
3603          - x: 50
3604          - y: 80
3605        - shape: CIRCLE
3606        - size: 50
3607
3608
3609        color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3610        ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3611        - color: GREEN
3612        - posn: ['20', ',', '40']
3613          - x: 20
3614          - y: 40
3615        - shape: TRIANGLE
3616        - size: 20
3617    """
3618    def __init__( self, exprs, savelist = True ):
3619        super(Each,self).__init__(exprs, savelist)
3620        self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3621        self.skipWhitespace = True
3622        self.initExprGroups = True
3623
3624    def parseImpl( self, instring, loc, doActions=True ):
3625        if self.initExprGroups:
3626            self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3627            opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3628            opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3629            self.optionals = opt1 + opt2
3630            self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3631            self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3632            self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3633            self.required += self.multirequired
3634            self.initExprGroups = False
3635        tmpLoc = loc
3636        tmpReqd = self.required[:]
3637        tmpOpt  = self.optionals[:]
3638        matchOrder = []
3639
3640        keepMatching = True
3641        while keepMatching:
3642            tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3643            failed = []
3644            for e in tmpExprs:
3645                try:
3646                    tmpLoc = e.tryParse( instring, tmpLoc )
3647                except ParseException:
3648                    failed.append(e)
3649                else:
3650                    matchOrder.append(self.opt1map.get(id(e),e))
3651                    if e in tmpReqd:
3652                        tmpReqd.remove(e)
3653                    elif e in tmpOpt:
3654                        tmpOpt.remove(e)
3655            if len(failed) == len(tmpExprs):
3656                keepMatching = False
3657
3658        if tmpReqd:
3659            missing = ", ".join(_ustr(e) for e in tmpReqd)
3660            raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3661
3662        # add any unmatched Optionals, in case they have default values defined
3663        matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3664
3665        resultlist = []
3666        for e in matchOrder:
3667            loc,results = e._parse(instring,loc,doActions)
3668            resultlist.append(results)
3669
3670        finalResults = sum(resultlist, ParseResults([]))
3671        return loc, finalResults
3672
3673    def __str__( self ):
3674        if hasattr(self,"name"):
3675            return self.name
3676
3677        if self.strRepr is None:
3678            self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3679
3680        return self.strRepr
3681
3682    def checkRecursion( self, parseElementList ):
3683        subRecCheckList = parseElementList[:] + [ self ]
3684        for e in self.exprs:
3685            e.checkRecursion( subRecCheckList )
3686
3687
3688class ParseElementEnhance(ParserElement):
3689    """
3690    Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3691    """
3692    def __init__( self, expr, savelist=False ):
3693        super(ParseElementEnhance,self).__init__(savelist)
3694        if isinstance( expr, basestring ):
3695            if issubclass(ParserElement._literalStringClass, Token):
3696                expr = ParserElement._literalStringClass(expr)
3697            else:
3698                expr = ParserElement._literalStringClass(Literal(expr))
3699        self.expr = expr
3700        self.strRepr = None
3701        if expr is not None:
3702            self.mayIndexError = expr.mayIndexError
3703            self.mayReturnEmpty = expr.mayReturnEmpty
3704            self.setWhitespaceChars( expr.whiteChars )
3705            self.skipWhitespace = expr.skipWhitespace
3706            self.saveAsList = expr.saveAsList
3707            self.callPreparse = expr.callPreparse
3708            self.ignoreExprs.extend(expr.ignoreExprs)
3709
3710    def parseImpl( self, instring, loc, doActions=True ):
3711        if self.expr is not None:
3712            return self.expr._parse( instring, loc, doActions, callPreParse=False )
3713        else:
3714            raise ParseException("",loc,self.errmsg,self)
3715
3716    def leaveWhitespace( self ):
3717        self.skipWhitespace = False
3718        self.expr = self.expr.copy()
3719        if self.expr is not None:
3720            self.expr.leaveWhitespace()
3721        return self
3722
3723    def ignore( self, other ):
3724        if isinstance( other, Suppress ):
3725            if other not in self.ignoreExprs:
3726                super( ParseElementEnhance, self).ignore( other )
3727                if self.expr is not None:
3728                    self.expr.ignore( self.ignoreExprs[-1] )
3729        else:
3730            super( ParseElementEnhance, self).ignore( other )
3731            if self.expr is not None:
3732                self.expr.ignore( self.ignoreExprs[-1] )
3733        return self
3734
3735    def streamline( self ):
3736        super(ParseElementEnhance,self).streamline()
3737        if self.expr is not None:
3738            self.expr.streamline()
3739        return self
3740
3741    def checkRecursion( self, parseElementList ):
3742        if self in parseElementList:
3743            raise RecursiveGrammarException( parseElementList+[self] )
3744        subRecCheckList = parseElementList[:] + [ self ]
3745        if self.expr is not None:
3746            self.expr.checkRecursion( subRecCheckList )
3747
3748    def validate( self, validateTrace=[] ):
3749        tmp = validateTrace[:]+[self]
3750        if self.expr is not None:
3751            self.expr.validate(tmp)
3752        self.checkRecursion( [] )
3753
3754    def __str__( self ):
3755        try:
3756            return super(ParseElementEnhance,self).__str__()
3757        except Exception:
3758            pass
3759
3760        if self.strRepr is None and self.expr is not None:
3761            self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3762        return self.strRepr
3763
3764
3765class FollowedBy(ParseElementEnhance):
3766    """
3767    Lookahead matching of the given parse expression.  C{FollowedBy}
3768    does I{not} advance the parsing position within the input string, it only
3769    verifies that the specified parse expression matches at the current
3770    position.  C{FollowedBy} always returns a null token list.
3771
3772    Example::
3773        # use FollowedBy to match a label only if it is followed by a ':'
3774        data_word = Word(alphas)
3775        label = data_word + FollowedBy(':')
3776        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3777
3778        OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3779    prints::
3780        [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3781    """
3782    def __init__( self, expr ):
3783        super(FollowedBy,self).__init__(expr)
3784        self.mayReturnEmpty = True
3785
3786    def parseImpl( self, instring, loc, doActions=True ):
3787        self.expr.tryParse( instring, loc )
3788        return loc, []
3789
3790
3791class NotAny(ParseElementEnhance):
3792    """
3793    Lookahead to disallow matching with the given parse expression.  C{NotAny}
3794    does I{not} advance the parsing position within the input string, it only
3795    verifies that the specified parse expression does I{not} match at the current
3796    position.  Also, C{NotAny} does I{not} skip over leading whitespace. C{NotAny}
3797    always returns a null token list.  May be constructed using the '~' operator.
3798
3799    Example::
3800
3801    """
3802    def __init__( self, expr ):
3803        super(NotAny,self).__init__(expr)
3804        #~ self.leaveWhitespace()
3805        self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
3806        self.mayReturnEmpty = True
3807        self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3808
3809    def parseImpl( self, instring, loc, doActions=True ):
3810        if self.expr.canParseNext(instring, loc):
3811            raise ParseException(instring, loc, self.errmsg, self)
3812        return loc, []
3813
3814    def __str__( self ):
3815        if hasattr(self,"name"):
3816            return self.name
3817
3818        if self.strRepr is None:
3819            self.strRepr = "~{" + _ustr(self.expr) + "}"
3820
3821        return self.strRepr
3822
3823class _MultipleMatch(ParseElementEnhance):
3824    def __init__( self, expr, stopOn=None):
3825        super(_MultipleMatch, self).__init__(expr)
3826        self.saveAsList = True
3827        ender = stopOn
3828        if isinstance(ender, basestring):
3829            ender = ParserElement._literalStringClass(ender)
3830        self.not_ender = ~ender if ender is not None else None
3831
3832    def parseImpl( self, instring, loc, doActions=True ):
3833        self_expr_parse = self.expr._parse
3834        self_skip_ignorables = self._skipIgnorables
3835        check_ender = self.not_ender is not None
3836        if check_ender:
3837            try_not_ender = self.not_ender.tryParse
3838
3839        # must be at least one (but first see if we are the stopOn sentinel;
3840        # if so, fail)
3841        if check_ender:
3842            try_not_ender(instring, loc)
3843        loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3844        try:
3845            hasIgnoreExprs = (not not self.ignoreExprs)
3846            while 1:
3847                if check_ender:
3848                    try_not_ender(instring, loc)
3849                if hasIgnoreExprs:
3850                    preloc = self_skip_ignorables( instring, loc )
3851                else:
3852                    preloc = loc
3853                loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3854                if tmptokens or tmptokens.haskeys():
3855                    tokens += tmptokens
3856        except (ParseException,IndexError):
3857            pass
3858
3859        return loc, tokens
3860
3861class OneOrMore(_MultipleMatch):
3862    """
3863    Repetition of one or more of the given expression.
3864
3865    Parameters:
3866     - expr - expression that must match one or more times
3867     - stopOn - (default=C{None}) - expression for a terminating sentinel
3868          (only required if the sentinel would ordinarily match the repetition
3869          expression)
3870
3871    Example::
3872        data_word = Word(alphas)
3873        label = data_word + FollowedBy(':')
3874        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3875
3876        text = "shape: SQUARE posn: upper left color: BLACK"
3877        OneOrMore(attr_expr).parseString(text).pprint()  # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3878
3879        # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3880        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3881        OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3882
3883        # could also be written as
3884        (attr_expr * (1,)).parseString(text).pprint()
3885    """
3886
3887    def __str__( self ):
3888        if hasattr(self,"name"):
3889            return self.name
3890
3891        if self.strRepr is None:
3892            self.strRepr = "{" + _ustr(self.expr) + "}..."
3893
3894        return self.strRepr
3895
3896class ZeroOrMore(_MultipleMatch):
3897    """
3898    Optional repetition of zero or more of the given expression.
3899
3900    Parameters:
3901     - expr - expression that must match zero or more times
3902     - stopOn - (default=C{None}) - expression for a terminating sentinel
3903          (only required if the sentinel would ordinarily match the repetition
3904          expression)
3905
3906    Example: similar to L{OneOrMore}
3907    """
3908    def __init__( self, expr, stopOn=None):
3909        super(ZeroOrMore,self).__init__(expr, stopOn=stopOn)
3910        self.mayReturnEmpty = True
3911
3912    def parseImpl( self, instring, loc, doActions=True ):
3913        try:
3914            return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
3915        except (ParseException,IndexError):
3916            return loc, []
3917
3918    def __str__( self ):
3919        if hasattr(self,"name"):
3920            return self.name
3921
3922        if self.strRepr is None:
3923            self.strRepr = "[" + _ustr(self.expr) + "]..."
3924
3925        return self.strRepr
3926
3927class _NullToken(object):
3928    def __bool__(self):
3929        return False
3930    __nonzero__ = __bool__
3931    def __str__(self):
3932        return ""
3933
3934_optionalNotMatched = _NullToken()
3935class Optional(ParseElementEnhance):
3936    """
3937    Optional matching of the given expression.
3938
3939    Parameters:
3940     - expr - expression that must match zero or more times
3941     - default (optional) - value to be returned if the optional expression is not found.
3942
3943    Example::
3944        # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3945        zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3946        zip.runTests('''
3947            # traditional ZIP code
3948            12345
3949
3950            # ZIP+4 form
3951            12101-0001
3952
3953            # invalid ZIP
3954            98765-
3955            ''')
3956    prints::
3957        # traditional ZIP code
3958        12345
3959        ['12345']
3960
3961        # ZIP+4 form
3962        12101-0001
3963        ['12101-0001']
3964
3965        # invalid ZIP
3966        98765-
3967             ^
3968        FAIL: Expected end of text (at char 5), (line:1, col:6)
3969    """
3970    def __init__( self, expr, default=_optionalNotMatched ):
3971        super(Optional,self).__init__( expr, savelist=False )
3972        self.saveAsList = self.expr.saveAsList
3973        self.defaultValue = default
3974        self.mayReturnEmpty = True
3975
3976    def parseImpl( self, instring, loc, doActions=True ):
3977        try:
3978            loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
3979        except (ParseException,IndexError):
3980            if self.defaultValue is not _optionalNotMatched:
3981                if self.expr.resultsName:
3982                    tokens = ParseResults([ self.defaultValue ])
3983                    tokens[self.expr.resultsName] = self.defaultValue
3984                else:
3985                    tokens = [ self.defaultValue ]
3986            else:
3987                tokens = []
3988        return loc, tokens
3989
3990    def __str__( self ):
3991        if hasattr(self,"name"):
3992            return self.name
3993
3994        if self.strRepr is None:
3995            self.strRepr = "[" + _ustr(self.expr) + "]"
3996
3997        return self.strRepr
3998
3999class SkipTo(ParseElementEnhance):
4000    """
4001    Token for skipping over all undefined text until the matched expression is found.
4002
4003    Parameters:
4004     - expr - target expression marking the end of the data to be skipped
4005     - include - (default=C{False}) if True, the target expression is also parsed
4006          (the skipped text and target expression are returned as a 2-element list).
4007     - ignore - (default=C{None}) used to define grammars (typically quoted strings and
4008          comments) that might contain false matches to the target expression
4009     - failOn - (default=C{None}) define expressions that are not allowed to be
4010          included in the skipped test; if found before the target expression is found,
4011          the SkipTo is not a match
4012
4013    Example::
4014        report = '''
4015            Outstanding Issues Report - 1 Jan 2000
4016
4017               # | Severity | Description                               |  Days Open
4018            -----+----------+-------------------------------------------+-----------
4019             101 | Critical | Intermittent system crash                 |          6
4020              94 | Cosmetic | Spelling error on Login ('log|n')         |         14
4021              79 | Minor    | System slow when running too many reports |         47
4022            '''
4023        integer = Word(nums)
4024        SEP = Suppress('|')
4025        # use SkipTo to simply match everything up until the next SEP
4026        # - ignore quoted strings, so that a '|' character inside a quoted string does not match
4027        # - parse action will call token.strip() for each matched token, i.e., the description body
4028        string_data = SkipTo(SEP, ignore=quotedString)
4029        string_data.setParseAction(tokenMap(str.strip))
4030        ticket_expr = (integer("issue_num") + SEP
4031                      + string_data("sev") + SEP
4032                      + string_data("desc") + SEP
4033                      + integer("days_open"))
4034
4035        for tkt in ticket_expr.searchString(report):
4036            print tkt.dump()
4037    prints::
4038        ['101', 'Critical', 'Intermittent system crash', '6']
4039        - days_open: 6
4040        - desc: Intermittent system crash
4041        - issue_num: 101
4042        - sev: Critical
4043        ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
4044        - days_open: 14
4045        - desc: Spelling error on Login ('log|n')
4046        - issue_num: 94
4047        - sev: Cosmetic
4048        ['79', 'Minor', 'System slow when running too many reports', '47']
4049        - days_open: 47
4050        - desc: System slow when running too many reports
4051        - issue_num: 79
4052        - sev: Minor
4053    """
4054    def __init__( self, other, include=False, ignore=None, failOn=None ):
4055        super( SkipTo, self ).__init__( other )
4056        self.ignoreExpr = ignore
4057        self.mayReturnEmpty = True
4058        self.mayIndexError = False
4059        self.includeMatch = include
4060        self.asList = False
4061        if isinstance(failOn, basestring):
4062            self.failOn = ParserElement._literalStringClass(failOn)
4063        else:
4064            self.failOn = failOn
4065        self.errmsg = "No match found for "+_ustr(self.expr)
4066
4067    def parseImpl( self, instring, loc, doActions=True ):
4068        startloc = loc
4069        instrlen = len(instring)
4070        expr = self.expr
4071        expr_parse = self.expr._parse
4072        self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
4073        self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
4074
4075        tmploc = loc
4076        while tmploc <= instrlen:
4077            if self_failOn_canParseNext is not None:
4078                # break if failOn expression matches
4079                if self_failOn_canParseNext(instring, tmploc):
4080                    break
4081
4082            if self_ignoreExpr_tryParse is not None:
4083                # advance past ignore expressions
4084                while 1:
4085                    try:
4086                        tmploc = self_ignoreExpr_tryParse(instring, tmploc)
4087                    except ParseBaseException:
4088                        break
4089
4090            try:
4091                expr_parse(instring, tmploc, doActions=False, callPreParse=False)
4092            except (ParseException, IndexError):
4093                # no match, advance loc in string
4094                tmploc += 1
4095            else:
4096                # matched skipto expr, done
4097                break
4098
4099        else:
4100            # ran off the end of the input string without matching skipto expr, fail
4101            raise ParseException(instring, loc, self.errmsg, self)
4102
4103        # build up return values
4104        loc = tmploc
4105        skiptext = instring[startloc:loc]
4106        skipresult = ParseResults(skiptext)
4107
4108        if self.includeMatch:
4109            loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
4110            skipresult += mat
4111
4112        return loc, skipresult
4113
4114class Forward(ParseElementEnhance):
4115    """
4116    Forward declaration of an expression to be defined later -
4117    used for recursive grammars, such as algebraic infix notation.
4118    When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
4119
4120    Note: take care when assigning to C{Forward} not to overlook precedence of operators.
4121    Specifically, '|' has a lower precedence than '<<', so that::
4122        fwdExpr << a | b | c
4123    will actually be evaluated as::
4124        (fwdExpr << a) | b | c
4125    thereby leaving b and c out as parseable alternatives.  It is recommended that you
4126    explicitly group the values inserted into the C{Forward}::
4127        fwdExpr << (a | b | c)
4128    Converting to use the '<<=' operator instead will avoid this problem.
4129
4130    See L{ParseResults.pprint} for an example of a recursive parser created using
4131    C{Forward}.
4132    """
4133    def __init__( self, other=None ):
4134        super(Forward,self).__init__( other, savelist=False )
4135
4136    def __lshift__( self, other ):
4137        if isinstance( other, basestring ):
4138            other = ParserElement._literalStringClass(other)
4139        self.expr = other
4140        self.strRepr = None
4141        self.mayIndexError = self.expr.mayIndexError
4142        self.mayReturnEmpty = self.expr.mayReturnEmpty
4143        self.setWhitespaceChars( self.expr.whiteChars )
4144        self.skipWhitespace = self.expr.skipWhitespace
4145        self.saveAsList = self.expr.saveAsList
4146        self.ignoreExprs.extend(self.expr.ignoreExprs)
4147        return self
4148
4149    def __ilshift__(self, other):
4150        return self << other
4151
4152    def leaveWhitespace( self ):
4153        self.skipWhitespace = False
4154        return self
4155
4156    def streamline( self ):
4157        if not self.streamlined:
4158            self.streamlined = True
4159            if self.expr is not None:
4160                self.expr.streamline()
4161        return self
4162
4163    def validate( self, validateTrace=[] ):
4164        if self not in validateTrace:
4165            tmp = validateTrace[:]+[self]
4166            if self.expr is not None:
4167                self.expr.validate(tmp)
4168        self.checkRecursion([])
4169
4170    def __str__( self ):
4171        if hasattr(self,"name"):
4172            return self.name
4173        return self.__class__.__name__ + ": ..."
4174
4175        # stubbed out for now - creates awful memory and perf issues
4176        self._revertClass = self.__class__
4177        self.__class__ = _ForwardNoRecurse
4178        try:
4179            if self.expr is not None:
4180                retString = _ustr(self.expr)
4181            else:
4182                retString = "None"
4183        finally:
4184            self.__class__ = self._revertClass
4185        return self.__class__.__name__ + ": " + retString
4186
4187    def copy(self):
4188        if self.expr is not None:
4189            return super(Forward,self).copy()
4190        else:
4191            ret = Forward()
4192            ret <<= self
4193            return ret
4194
4195class _ForwardNoRecurse(Forward):
4196    def __str__( self ):
4197        return "..."
4198
4199class TokenConverter(ParseElementEnhance):
4200    """
4201    Abstract subclass of C{ParseExpression}, for converting parsed results.
4202    """
4203    def __init__( self, expr, savelist=False ):
4204        super(TokenConverter,self).__init__( expr )#, savelist )
4205        self.saveAsList = False
4206
4207class Combine(TokenConverter):
4208    """
4209    Converter to concatenate all matching tokens to a single string.
4210    By default, the matching patterns must also be contiguous in the input string;
4211    this can be disabled by specifying C{'adjacent=False'} in the constructor.
4212
4213    Example::
4214        real = Word(nums) + '.' + Word(nums)
4215        print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4216        # will also erroneously match the following
4217        print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4218
4219        real = Combine(Word(nums) + '.' + Word(nums))
4220        print(real.parseString('3.1416')) # -> ['3.1416']
4221        # no match when there are internal spaces
4222        print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4223    """
4224    def __init__( self, expr, joinString="", adjacent=True ):
4225        super(Combine,self).__init__( expr )
4226        # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
4227        if adjacent:
4228            self.leaveWhitespace()
4229        self.adjacent = adjacent
4230        self.skipWhitespace = True
4231        self.joinString = joinString
4232        self.callPreparse = True
4233
4234    def ignore( self, other ):
4235        if self.adjacent:
4236            ParserElement.ignore(self, other)
4237        else:
4238            super( Combine, self).ignore( other )
4239        return self
4240
4241    def postParse( self, instring, loc, tokenlist ):
4242        retToks = tokenlist.copy()
4243        del retToks[:]
4244        retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4245
4246        if self.resultsName and retToks.haskeys():
4247            return [ retToks ]
4248        else:
4249            return retToks
4250
4251class Group(TokenConverter):
4252    """
4253    Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4254
4255    Example::
4256        ident = Word(alphas)
4257        num = Word(nums)
4258        term = ident | num
4259        func = ident + Optional(delimitedList(term))
4260        print(func.parseString("fn a,b,100"))  # -> ['fn', 'a', 'b', '100']
4261
4262        func = ident + Group(Optional(delimitedList(term)))
4263        print(func.parseString("fn a,b,100"))  # -> ['fn', ['a', 'b', '100']]
4264    """
4265    def __init__( self, expr ):
4266        super(Group,self).__init__( expr )
4267        self.saveAsList = True
4268
4269    def postParse( self, instring, loc, tokenlist ):
4270        return [ tokenlist ]
4271
4272class Dict(TokenConverter):
4273    """
4274    Converter to return a repetitive expression as a list, but also as a dictionary.
4275    Each element can also be referenced using the first token in the expression as its key.
4276    Useful for tabular report scraping when the first column can be used as a item key.
4277
4278    Example::
4279        data_word = Word(alphas)
4280        label = data_word + FollowedBy(':')
4281        attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4282
4283        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4284        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4285
4286        # print attributes as plain groups
4287        print(OneOrMore(attr_expr).parseString(text).dump())
4288
4289        # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4290        result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4291        print(result.dump())
4292
4293        # access named fields as dict entries, or output as dict
4294        print(result['shape'])
4295        print(result.asDict())
4296    prints::
4297        ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4298
4299        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4300        - color: light blue
4301        - posn: upper left
4302        - shape: SQUARE
4303        - texture: burlap
4304        SQUARE
4305        {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4306    See more examples at L{ParseResults} of accessing fields by results name.
4307    """
4308    def __init__( self, expr ):
4309        super(Dict,self).__init__( expr )
4310        self.saveAsList = True
4311
4312    def postParse( self, instring, loc, tokenlist ):
4313        for i,tok in enumerate(tokenlist):
4314            if len(tok) == 0:
4315                continue
4316            ikey = tok[0]
4317            if isinstance(ikey,int):
4318                ikey = _ustr(tok[0]).strip()
4319            if len(tok)==1:
4320                tokenlist[ikey] = _ParseResultsWithOffset("",i)
4321            elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4322                tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4323            else:
4324                dictvalue = tok.copy() #ParseResults(i)
4325                del dictvalue[0]
4326                if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4327                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4328                else:
4329                    tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4330
4331        if self.resultsName:
4332            return [ tokenlist ]
4333        else:
4334            return tokenlist
4335
4336
4337class Suppress(TokenConverter):
4338    """
4339    Converter for ignoring the results of a parsed expression.
4340
4341    Example::
4342        source = "a, b, c,d"
4343        wd = Word(alphas)
4344        wd_list1 = wd + ZeroOrMore(',' + wd)
4345        print(wd_list1.parseString(source))
4346
4347        # often, delimiters that are useful during parsing are just in the
4348        # way afterward - use Suppress to keep them out of the parsed output
4349        wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4350        print(wd_list2.parseString(source))
4351    prints::
4352        ['a', ',', 'b', ',', 'c', ',', 'd']
4353        ['a', 'b', 'c', 'd']
4354    (See also L{delimitedList}.)
4355    """
4356    def postParse( self, instring, loc, tokenlist ):
4357        return []
4358
4359    def suppress( self ):
4360        return self
4361
4362
4363class OnlyOnce(object):
4364    """
4365    Wrapper for parse actions, to ensure they are only called once.
4366    """
4367    def __init__(self, methodCall):
4368        self.callable = _trim_arity(methodCall)
4369        self.called = False
4370    def __call__(self,s,l,t):
4371        if not self.called:
4372            results = self.callable(s,l,t)
4373            self.called = True
4374            return results
4375        raise ParseException(s,l,"")
4376    def reset(self):
4377        self.called = False
4378
4379def traceParseAction(f):
4380    """
4381    Decorator for debugging parse actions.
4382
4383    When the parse action is called, this decorator will print C{">> entering I{method-name}(line:I{current_source_line}, I{parse_location}, I{matched_tokens})".}
4384    When the parse action completes, the decorator will print C{"<<"} followed by the returned value, or any exception that the parse action raised.
4385
4386    Example::
4387        wd = Word(alphas)
4388
4389        @traceParseAction
4390        def remove_duplicate_chars(tokens):
4391            return ''.join(sorted(set(''.join(tokens)))
4392
4393        wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4394        print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4395    prints::
4396        >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4397        <<leaving remove_duplicate_chars (ret: 'dfjkls')
4398        ['dfjkls']
4399    """
4400    f = _trim_arity(f)
4401    def z(*paArgs):
4402        thisFunc = f.__name__
4403        s,l,t = paArgs[-3:]
4404        if len(paArgs)>3:
4405            thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4406        sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4407        try:
4408            ret = f(*paArgs)
4409        except Exception as exc:
4410            sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4411            raise
4412        sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4413        return ret
4414    try:
4415        z.__name__ = f.__name__
4416    except AttributeError:
4417        pass
4418    return z
4419
4420#
4421# global helpers
4422#
4423def delimitedList( expr, delim=",", combine=False ):
4424    """
4425    Helper to define a delimited list of expressions - the delimiter defaults to ','.
4426    By default, the list elements and delimiters can have intervening whitespace, and
4427    comments, but this can be overridden by passing C{combine=True} in the constructor.
4428    If C{combine} is set to C{True}, the matching tokens are returned as a single token
4429    string, with the delimiters included; otherwise, the matching tokens are returned
4430    as a list of tokens, with the delimiters suppressed.
4431
4432    Example::
4433        delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4434        delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4435    """
4436    dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4437    if combine:
4438        return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4439    else:
4440        return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4441
4442def countedArray( expr, intExpr=None ):
4443    """
4444    Helper to define a counted list of expressions.
4445    This helper defines a pattern of the form::
4446        integer expr expr expr...
4447    where the leading integer tells how many expr expressions follow.
4448    The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4449
4450    If C{intExpr} is specified, it should be a pyparsing expression that produces an integer value.
4451
4452    Example::
4453        countedArray(Word(alphas)).parseString('2 ab cd ef')  # -> ['ab', 'cd']
4454
4455        # in this parser, the leading integer value is given in binary,
4456        # '10' indicating that 2 values are in the array
4457        binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2))
4458        countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef')  # -> ['ab', 'cd']
4459    """
4460    arrayExpr = Forward()
4461    def countFieldParseAction(s,l,t):
4462        n = t[0]
4463        arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4464        return []
4465    if intExpr is None:
4466        intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4467    else:
4468        intExpr = intExpr.copy()
4469    intExpr.setName("arrayLen")
4470    intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4471    return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4472
4473def _flatten(L):
4474    ret = []
4475    for i in L:
4476        if isinstance(i,list):
4477            ret.extend(_flatten(i))
4478        else:
4479            ret.append(i)
4480    return ret
4481
4482def matchPreviousLiteral(expr):
4483    """
4484    Helper to define an expression that is indirectly defined from
4485    the tokens matched in a previous expression, that is, it looks
4486    for a 'repeat' of a previous expression.  For example::
4487        first = Word(nums)
4488        second = matchPreviousLiteral(first)
4489        matchExpr = first + ":" + second
4490    will match C{"1:1"}, but not C{"1:2"}.  Because this matches a
4491    previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4492    If this is not desired, use C{matchPreviousExpr}.
4493    Do I{not} use with packrat parsing enabled.
4494    """
4495    rep = Forward()
4496    def copyTokenToRepeater(s,l,t):
4497        if t:
4498            if len(t) == 1:
4499                rep << t[0]
4500            else:
4501                # flatten t tokens
4502                tflat = _flatten(t.asList())
4503                rep << And(Literal(tt) for tt in tflat)
4504        else:
4505            rep << Empty()
4506    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4507    rep.setName('(prev) ' + _ustr(expr))
4508    return rep
4509
4510def matchPreviousExpr(expr):
4511    """
4512    Helper to define an expression that is indirectly defined from
4513    the tokens matched in a previous expression, that is, it looks
4514    for a 'repeat' of a previous expression.  For example::
4515        first = Word(nums)
4516        second = matchPreviousExpr(first)
4517        matchExpr = first + ":" + second
4518    will match C{"1:1"}, but not C{"1:2"}.  Because this matches by
4519    expressions, will I{not} match the leading C{"1:1"} in C{"1:10"};
4520    the expressions are evaluated first, and then compared, so
4521    C{"1"} is compared with C{"10"}.
4522    Do I{not} use with packrat parsing enabled.
4523    """
4524    rep = Forward()
4525    e2 = expr.copy()
4526    rep <<= e2
4527    def copyTokenToRepeater(s,l,t):
4528        matchTokens = _flatten(t.asList())
4529        def mustMatchTheseTokens(s,l,t):
4530            theseTokens = _flatten(t.asList())
4531            if  theseTokens != matchTokens:
4532                raise ParseException("",0,"")
4533        rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4534    expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4535    rep.setName('(prev) ' + _ustr(expr))
4536    return rep
4537
4538def _escapeRegexRangeChars(s):
4539    #~  escape these chars: ^-]
4540    for c in r"\^-]":
4541        s = s.replace(c,_bslash+c)
4542    s = s.replace("\n",r"\n")
4543    s = s.replace("\t",r"\t")
4544    return _ustr(s)
4545
4546def oneOf( strs, caseless=False, useRegex=True ):
4547    """
4548    Helper to quickly define a set of alternative Literals, and makes sure to do
4549    longest-first testing when there is a conflict, regardless of the input order,
4550    but returns a C{L{MatchFirst}} for best performance.
4551
4552    Parameters:
4553     - strs - a string of space-delimited literals, or a collection of string literals
4554     - caseless - (default=C{False}) - treat all literals as caseless
4555     - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4556          object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4557          if creating a C{Regex} raises an exception)
4558
4559    Example::
4560        comp_oper = oneOf("< = > <= >= !=")
4561        var = Word(alphas)
4562        number = Word(nums)
4563        term = var | number
4564        comparison_expr = term + comp_oper + term
4565        print(comparison_expr.searchString("B = 12  AA=23 B<=AA AA>12"))
4566    prints::
4567        [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4568    """
4569    if caseless:
4570        isequal = ( lambda a,b: a.upper() == b.upper() )
4571        masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4572        parseElementClass = CaselessLiteral
4573    else:
4574        isequal = ( lambda a,b: a == b )
4575        masks = ( lambda a,b: b.startswith(a) )
4576        parseElementClass = Literal
4577
4578    symbols = []
4579    if isinstance(strs,basestring):
4580        symbols = strs.split()
4581    elif isinstance(strs, collections.Iterable):
4582        symbols = list(strs)
4583    else:
4584        warnings.warn("Invalid argument to oneOf, expected string or iterable",
4585                SyntaxWarning, stacklevel=2)
4586    if not symbols:
4587        return NoMatch()
4588
4589    i = 0
4590    while i < len(symbols)-1:
4591        cur = symbols[i]
4592        for j,other in enumerate(symbols[i+1:]):
4593            if ( isequal(other, cur) ):
4594                del symbols[i+j+1]
4595                break
4596            elif ( masks(cur, other) ):
4597                del symbols[i+j+1]
4598                symbols.insert(i,other)
4599                cur = other
4600                break
4601        else:
4602            i += 1
4603
4604    if not caseless and useRegex:
4605        #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
4606        try:
4607            if len(symbols)==len("".join(symbols)):
4608                return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4609            else:
4610                return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4611        except Exception:
4612            warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4613                    SyntaxWarning, stacklevel=2)
4614
4615
4616    # last resort, just use MatchFirst
4617    return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4618
4619def dictOf( key, value ):
4620    """
4621    Helper to easily and clearly define a dictionary by specifying the respective patterns
4622    for the key and value.  Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4623    in the proper order.  The key pattern can include delimiting markers or punctuation,
4624    as long as they are suppressed, thereby leaving the significant key text.  The value
4625    pattern can include named results, so that the C{Dict} results can include named token
4626    fields.
4627
4628    Example::
4629        text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4630        attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4631        print(OneOrMore(attr_expr).parseString(text).dump())
4632
4633        attr_label = label
4634        attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4635
4636        # similar to Dict, but simpler call format
4637        result = dictOf(attr_label, attr_value).parseString(text)
4638        print(result.dump())
4639        print(result['shape'])
4640        print(result.shape)  # object attribute access works too
4641        print(result.asDict())
4642    prints::
4643        [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4644        - color: light blue
4645        - posn: upper left
4646        - shape: SQUARE
4647        - texture: burlap
4648        SQUARE
4649        SQUARE
4650        {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4651    """
4652    return Dict( ZeroOrMore( Group ( key + value ) ) )
4653
4654def originalTextFor(expr, asString=True):
4655    """
4656    Helper to return the original, untokenized text for a given expression.  Useful to
4657    restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4658    revert separate tokens with intervening whitespace back to the original matching
4659    input text. By default, returns astring containing the original parsed text.
4660
4661    If the optional C{asString} argument is passed as C{False}, then the return value is a
4662    C{L{ParseResults}} containing any results names that were originally matched, and a
4663    single token containing the original matched text from the input string.  So if
4664    the expression passed to C{L{originalTextFor}} contains expressions with defined
4665    results names, you must set C{asString} to C{False} if you want to preserve those
4666    results name values.
4667
4668    Example::
4669        src = "this is test <b> bold <i>text</i> </b> normal text "
4670        for tag in ("b","i"):
4671            opener,closer = makeHTMLTags(tag)
4672            patt = originalTextFor(opener + SkipTo(closer) + closer)
4673            print(patt.searchString(src)[0])
4674    prints::
4675        ['<b> bold <i>text</i> </b>']
4676        ['<i>text</i>']
4677    """
4678    locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4679    endlocMarker = locMarker.copy()
4680    endlocMarker.callPreparse = False
4681    matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4682    if asString:
4683        extractText = lambda s,l,t: s[t._original_start:t._original_end]
4684    else:
4685        def extractText(s,l,t):
4686            t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4687    matchExpr.setParseAction(extractText)
4688    matchExpr.ignoreExprs = expr.ignoreExprs
4689    return matchExpr
4690
4691def ungroup(expr):
4692    """
4693    Helper to undo pyparsing's default grouping of And expressions, even
4694    if all but one are non-empty.
4695    """
4696    return TokenConverter(expr).setParseAction(lambda t:t[0])
4697
4698def locatedExpr(expr):
4699    """
4700    Helper to decorate a returned token with its starting and ending locations in the input string.
4701    This helper adds the following results names:
4702     - locn_start = location where matched expression begins
4703     - locn_end = location where matched expression ends
4704     - value = the actual parsed results
4705
4706    Be careful if the input text contains C{<TAB>} characters, you may want to call
4707    C{L{ParserElement.parseWithTabs}}
4708
4709    Example::
4710        wd = Word(alphas)
4711        for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4712            print(match)
4713    prints::
4714        [[0, 'ljsdf', 5]]
4715        [[8, 'lksdjjf', 15]]
4716        [[18, 'lkkjj', 23]]
4717    """
4718    locator = Empty().setParseAction(lambda s,l,t: l)
4719    return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4720
4721
4722# convenience constants for positional expressions
4723empty       = Empty().setName("empty")
4724lineStart   = LineStart().setName("lineStart")
4725lineEnd     = LineEnd().setName("lineEnd")
4726stringStart = StringStart().setName("stringStart")
4727stringEnd   = StringEnd().setName("stringEnd")
4728
4729_escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4730_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4731_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4732_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
4733_charRange = Group(_singleChar + Suppress("-") + _singleChar)
4734_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4735
4736def srange(s):
4737    r"""
4738    Helper to easily define string ranges for use in Word construction.  Borrows
4739    syntax from regexp '[]' string range definitions::
4740        srange("[0-9]")   -> "0123456789"
4741        srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
4742        srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4743    The input string must be enclosed in []'s, and the returned string is the expanded
4744    character set joined into a single string.
4745    The values enclosed in the []'s may be:
4746     - a single character
4747     - an escaped character with a leading backslash (such as C{\-} or C{\]})
4748     - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4749         (C{\0x##} is also supported for backwards compatibility)
4750     - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4751     - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4752     - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4753    """
4754    _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4755    try:
4756        return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4757    except Exception:
4758        return ""
4759
4760def matchOnlyAtCol(n):
4761    """
4762    Helper method for defining parse actions that require matching at a specific
4763    column in the input text.
4764    """
4765    def verifyCol(strg,locn,toks):
4766        if col(locn,strg) != n:
4767            raise ParseException(strg,locn,"matched token not at column %d" % n)
4768    return verifyCol
4769
4770def replaceWith(replStr):
4771    """
4772    Helper method for common parse actions that simply return a literal value.  Especially
4773    useful when used with C{L{transformString<ParserElement.transformString>}()}.
4774
4775    Example::
4776        num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4777        na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4778        term = na | num
4779
4780        OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4781    """
4782    return lambda s,l,t: [replStr]
4783
4784def removeQuotes(s,l,t):
4785    """
4786    Helper parse action for removing quotation marks from parsed quoted strings.
4787
4788    Example::
4789        # by default, quotation marks are included in parsed results
4790        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4791
4792        # use removeQuotes to strip quotation marks from parsed results
4793        quotedString.setParseAction(removeQuotes)
4794        quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4795    """
4796    return t[0][1:-1]
4797
4798def tokenMap(func, *args):
4799    """
4800    Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4801    args are passed, they are forwarded to the given function as additional arguments after
4802    the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4803    parsed data to an integer using base 16.
4804
4805    Example (compare the last to example in L{ParserElement.transformString}::
4806        hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4807        hex_ints.runTests('''
4808            00 11 22 aa FF 0a 0d 1a
4809            ''')
4810
4811        upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4812        OneOrMore(upperword).runTests('''
4813            my kingdom for a horse
4814            ''')
4815
4816        wd = Word(alphas).setParseAction(tokenMap(str.title))
4817        OneOrMore(wd).setParseAction(' '.join).runTests('''
4818            now is the winter of our discontent made glorious summer by this sun of york
4819            ''')
4820    prints::
4821        00 11 22 aa FF 0a 0d 1a
4822        [0, 17, 34, 170, 255, 10, 13, 26]
4823
4824        my kingdom for a horse
4825        ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4826
4827        now is the winter of our discontent made glorious summer by this sun of york
4828        ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4829    """
4830    def pa(s,l,t):
4831        return [func(tokn, *args) for tokn in t]
4832
4833    try:
4834        func_name = getattr(func, '__name__',
4835                            getattr(func, '__class__').__name__)
4836    except Exception:
4837        func_name = str(func)
4838    pa.__name__ = func_name
4839
4840    return pa
4841
4842upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4843"""(Deprecated) Helper parse action to convert tokens to upper case. Deprecated in favor of L{pyparsing_common.upcaseTokens}"""
4844
4845downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4846"""(Deprecated) Helper parse action to convert tokens to lower case. Deprecated in favor of L{pyparsing_common.downcaseTokens}"""
4847
4848def _makeTags(tagStr, xml):
4849    """Internal helper to construct opening and closing tag expressions, given a tag name"""
4850    if isinstance(tagStr,basestring):
4851        resname = tagStr
4852        tagStr = Keyword(tagStr, caseless=not xml)
4853    else:
4854        resname = tagStr.name
4855
4856    tagAttrName = Word(alphas,alphanums+"_-:")
4857    if (xml):
4858        tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
4859        openTag = Suppress("<") + tagStr("tag") + \
4860                Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
4861                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4862    else:
4863        printablesLessRAbrack = "".join(c for c in printables if c not in ">")
4864        tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
4865        openTag = Suppress("<") + tagStr("tag") + \
4866                Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
4867                Optional( Suppress("=") + tagAttrValue ) ))) + \
4868                Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
4869    closeTag = Combine(_L("</") + tagStr + ">")
4870
4871    openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname)
4872    closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname)
4873    openTag.tag = resname
4874    closeTag.tag = resname
4875    return openTag, closeTag
4876
4877def makeHTMLTags(tagStr):
4878    """
4879    Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches
4880    tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values.
4881
4882    Example::
4883        text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
4884        # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple
4885        a,a_end = makeHTMLTags("A")
4886        link_expr = a + SkipTo(a_end)("link_text") + a_end
4887
4888        for link in link_expr.searchString(text):
4889            # attributes in the <A> tag (like "href" shown here) are also accessible as named results
4890            print(link.link_text, '->', link.href)
4891    prints::
4892        pyparsing -> http://pyparsing.wikispaces.com
4893    """
4894    return _makeTags( tagStr, False )
4895
4896def makeXMLTags(tagStr):
4897    """
4898    Helper to construct opening and closing tag expressions for XML, given a tag name. Matches
4899    tags only in the given upper/lower case.
4900
4901    Example: similar to L{makeHTMLTags}
4902    """
4903    return _makeTags( tagStr, True )
4904
4905def withAttribute(*args,**attrDict):
4906    """
4907    Helper to create a validating parse action to be used with start tags created
4908    with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4909    with a required attribute value, to avoid false matches on common tags such as
4910    C{<TD>} or C{<DIV>}.
4911
4912    Call C{withAttribute} with a series of attribute names and values. Specify the list
4913    of filter attributes names and values as:
4914     - keyword arguments, as in C{(align="right")}, or
4915     - as an explicit dict with C{**} operator, when an attribute name is also a Python
4916          reserved word, as in C{**{"class":"Customer", "align":"right"}}
4917     - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4918    For attribute names with a namespace prefix, you must use the second form.  Attribute
4919    names are matched insensitive to upper/lower case.
4920
4921    If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4922
4923    To verify that the attribute exists, but without specifying a value, pass
4924    C{withAttribute.ANY_VALUE} as the value.
4925
4926    Example::
4927        html = '''
4928            <div>
4929            Some text
4930            <div type="grid">1 4 0 1 0</div>
4931            <div type="graph">1,3 2,3 1,1</div>
4932            <div>this has no type</div>
4933            </div>
4934
4935        '''
4936        div,div_end = makeHTMLTags("div")
4937
4938        # only match div tag having a type attribute with value "grid"
4939        div_grid = div().setParseAction(withAttribute(type="grid"))
4940        grid_expr = div_grid + SkipTo(div | div_end)("body")
4941        for grid_header in grid_expr.searchString(html):
4942            print(grid_header.body)
4943
4944        # construct a match with any div tag having a type attribute, regardless of the value
4945        div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4946        div_expr = div_any_type + SkipTo(div | div_end)("body")
4947        for div_header in div_expr.searchString(html):
4948            print(div_header.body)
4949    prints::
4950        1 4 0 1 0
4951
4952        1 4 0 1 0
4953        1,3 2,3 1,1
4954    """
4955    if args:
4956        attrs = args[:]
4957    else:
4958        attrs = attrDict.items()
4959    attrs = [(k,v) for k,v in attrs]
4960    def pa(s,l,tokens):
4961        for attrName,attrValue in attrs:
4962            if attrName not in tokens:
4963                raise ParseException(s,l,"no matching attribute " + attrName)
4964            if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4965                raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4966                                            (attrName, tokens[attrName], attrValue))
4967    return pa
4968withAttribute.ANY_VALUE = object()
4969
4970def withClass(classname, namespace=''):
4971    """
4972    Simplified version of C{L{withAttribute}} when matching on a div class - made
4973    difficult because C{class} is a reserved word in Python.
4974
4975    Example::
4976        html = '''
4977            <div>
4978            Some text
4979            <div class="grid">1 4 0 1 0</div>
4980            <div class="graph">1,3 2,3 1,1</div>
4981            <div>this &lt;div&gt; has no class</div>
4982            </div>
4983
4984        '''
4985        div,div_end = makeHTMLTags("div")
4986        div_grid = div().setParseAction(withClass("grid"))
4987
4988        grid_expr = div_grid + SkipTo(div | div_end)("body")
4989        for grid_header in grid_expr.searchString(html):
4990            print(grid_header.body)
4991
4992        div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
4993        div_expr = div_any_type + SkipTo(div | div_end)("body")
4994        for div_header in div_expr.searchString(html):
4995            print(div_header.body)
4996    prints::
4997        1 4 0 1 0
4998
4999        1 4 0 1 0
5000        1,3 2,3 1,1
5001    """
5002    classattr = "%s:class" % namespace if namespace else "class"
5003    return withAttribute(**{classattr : classname})
5004
5005opAssoc = _Constants()
5006opAssoc.LEFT = object()
5007opAssoc.RIGHT = object()
5008
5009def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
5010    """
5011    Helper method for constructing grammars of expressions made up of
5012    operators working in a precedence hierarchy.  Operators may be unary or
5013    binary, left- or right-associative.  Parse actions can also be attached
5014    to operator expressions. The generated parser will also recognize the use
5015    of parentheses to override operator precedences (see example below).
5016
5017    Note: if you define a deep operator list, you may see performance issues
5018    when using infixNotation. See L{ParserElement.enablePackrat} for a
5019    mechanism to potentially improve your parser performance.
5020
5021    Parameters:
5022     - baseExpr - expression representing the most basic element for the nested
5023     - opList - list of tuples, one for each operator precedence level in the
5024      expression grammar; each tuple is of the form
5025      (opExpr, numTerms, rightLeftAssoc, parseAction), where:
5026       - opExpr is the pyparsing expression for the operator;
5027          may also be a string, which will be converted to a Literal;
5028          if numTerms is 3, opExpr is a tuple of two expressions, for the
5029          two operators separating the 3 terms
5030       - numTerms is the number of terms for this operator (must
5031          be 1, 2, or 3)
5032       - rightLeftAssoc is the indicator whether the operator is
5033          right or left associative, using the pyparsing-defined
5034          constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
5035       - parseAction is the parse action to be associated with
5036          expressions matching this operator expression (the
5037          parse action tuple member may be omitted); if the parse action
5038          is passed a tuple or list of functions, this is equivalent to
5039          calling C{setParseAction(*fn)} (L{ParserElement.setParseAction})
5040     - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
5041     - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
5042
5043    Example::
5044        # simple example of four-function arithmetic with ints and variable names
5045        integer = pyparsing_common.signed_integer
5046        varname = pyparsing_common.identifier
5047
5048        arith_expr = infixNotation(integer | varname,
5049            [
5050            ('-', 1, opAssoc.RIGHT),
5051            (oneOf('* /'), 2, opAssoc.LEFT),
5052            (oneOf('+ -'), 2, opAssoc.LEFT),
5053            ])
5054
5055        arith_expr.runTests('''
5056            5+3*6
5057            (5+3)*6
5058            -2--11
5059            ''', fullDump=False)
5060    prints::
5061        5+3*6
5062        [[5, '+', [3, '*', 6]]]
5063
5064        (5+3)*6
5065        [[[5, '+', 3], '*', 6]]
5066
5067        -2--11
5068        [[['-', 2], '-', ['-', 11]]]
5069    """
5070    ret = Forward()
5071    lastExpr = baseExpr | ( lpar + ret + rpar )
5072    for i,operDef in enumerate(opList):
5073        opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
5074        termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
5075        if arity == 3:
5076            if opExpr is None or len(opExpr) != 2:
5077                raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
5078            opExpr1, opExpr2 = opExpr
5079        thisExpr = Forward().setName(termName)
5080        if rightLeftAssoc == opAssoc.LEFT:
5081            if arity == 1:
5082                matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
5083            elif arity == 2:
5084                if opExpr is not None:
5085                    matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
5086                else:
5087                    matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
5088            elif arity == 3:
5089                matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
5090                            Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
5091            else:
5092                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5093        elif rightLeftAssoc == opAssoc.RIGHT:
5094            if arity == 1:
5095                # try to avoid LR with this extra test
5096                if not isinstance(opExpr, Optional):
5097                    opExpr = Optional(opExpr)
5098                matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
5099            elif arity == 2:
5100                if opExpr is not None:
5101                    matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
5102                else:
5103                    matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
5104            elif arity == 3:
5105                matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
5106                            Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
5107            else:
5108                raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
5109        else:
5110            raise ValueError("operator must indicate right or left associativity")
5111        if pa:
5112            if isinstance(pa, (tuple, list)):
5113                matchExpr.setParseAction(*pa)
5114            else:
5115                matchExpr.setParseAction(pa)
5116        thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
5117        lastExpr = thisExpr
5118    ret <<= lastExpr
5119    return ret
5120
5121operatorPrecedence = infixNotation
5122"""(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
5123
5124dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
5125sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
5126quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
5127                       Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
5128unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
5129
5130def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
5131    """
5132    Helper method for defining nested lists enclosed in opening and closing
5133    delimiters ("(" and ")" are the default).
5134
5135    Parameters:
5136     - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
5137     - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
5138     - content - expression for items within the nested lists (default=C{None})
5139     - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
5140
5141    If an expression is not provided for the content argument, the nested
5142    expression will capture all whitespace-delimited content between delimiters
5143    as a list of separate values.
5144
5145    Use the C{ignoreExpr} argument to define expressions that may contain
5146    opening or closing characters that should not be treated as opening
5147    or closing characters for nesting, such as quotedString or a comment
5148    expression.  Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
5149    The default is L{quotedString}, but if no expressions are to be ignored,
5150    then pass C{None} for this argument.
5151
5152    Example::
5153        data_type = oneOf("void int short long char float double")
5154        decl_data_type = Combine(data_type + Optional(Word('*')))
5155        ident = Word(alphas+'_', alphanums+'_')
5156        number = pyparsing_common.number
5157        arg = Group(decl_data_type + ident)
5158        LPAR,RPAR = map(Suppress, "()")
5159
5160        code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5161
5162        c_function = (decl_data_type("type")
5163                      + ident("name")
5164                      + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5165                      + code_body("body"))
5166        c_function.ignore(cStyleComment)
5167
5168        source_code = '''
5169            int is_odd(int x) {
5170                return (x%2);
5171            }
5172
5173            int dec_to_hex(char hchar) {
5174                if (hchar >= '0' && hchar <= '9') {
5175                    return (ord(hchar)-ord('0'));
5176                } else {
5177                    return (10+ord(hchar)-ord('A'));
5178                }
5179            }
5180        '''
5181        for func in c_function.searchString(source_code):
5182            print("%(name)s (%(type)s) args: %(args)s" % func)
5183
5184    prints::
5185        is_odd (int) args: [['int', 'x']]
5186        dec_to_hex (int) args: [['char', 'hchar']]
5187    """
5188    if opener == closer:
5189        raise ValueError("opening and closing strings cannot be the same")
5190    if content is None:
5191        if isinstance(opener,basestring) and isinstance(closer,basestring):
5192            if len(opener) == 1 and len(closer)==1:
5193                if ignoreExpr is not None:
5194                    content = (Combine(OneOrMore(~ignoreExpr +
5195                                    CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5196                                ).setParseAction(lambda t:t[0].strip()))
5197                else:
5198                    content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5199                                ).setParseAction(lambda t:t[0].strip()))
5200            else:
5201                if ignoreExpr is not None:
5202                    content = (Combine(OneOrMore(~ignoreExpr +
5203                                    ~Literal(opener) + ~Literal(closer) +
5204                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5205                                ).setParseAction(lambda t:t[0].strip()))
5206                else:
5207                    content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5208                                    CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5209                                ).setParseAction(lambda t:t[0].strip()))
5210        else:
5211            raise ValueError("opening and closing arguments must be strings if no content expression is given")
5212    ret = Forward()
5213    if ignoreExpr is not None:
5214        ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5215    else:
5216        ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content )  + Suppress(closer) )
5217    ret.setName('nested %s%s expression' % (opener,closer))
5218    return ret
5219
5220def indentedBlock(blockStatementExpr, indentStack, indent=True):
5221    """
5222    Helper method for defining space-delimited indentation blocks, such as
5223    those used to define block statements in Python source code.
5224
5225    Parameters:
5226     - blockStatementExpr - expression defining syntax of statement that
5227            is repeated within the indented block
5228     - indentStack - list created by caller to manage indentation stack
5229            (multiple statementWithIndentedBlock expressions within a single grammar
5230            should share a common indentStack)
5231     - indent - boolean indicating whether block must be indented beyond the
5232            the current level; set to False for block of left-most statements
5233            (default=C{True})
5234
5235    A valid block must contain at least one C{blockStatement}.
5236
5237    Example::
5238        data = '''
5239        def A(z):
5240          A1
5241          B = 100
5242          G = A2
5243          A2
5244          A3
5245        B
5246        def BB(a,b,c):
5247          BB1
5248          def BBA():
5249            bba1
5250            bba2
5251            bba3
5252        C
5253        D
5254        def spam(x,y):
5255             def eggs(z):
5256                 pass
5257        '''
5258
5259
5260        indentStack = [1]
5261        stmt = Forward()
5262
5263        identifier = Word(alphas, alphanums)
5264        funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5265        func_body = indentedBlock(stmt, indentStack)
5266        funcDef = Group( funcDecl + func_body )
5267
5268        rvalue = Forward()
5269        funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5270        rvalue << (funcCall | identifier | Word(nums))
5271        assignment = Group(identifier + "=" + rvalue)
5272        stmt << ( funcDef | assignment | identifier )
5273
5274        module_body = OneOrMore(stmt)
5275
5276        parseTree = module_body.parseString(data)
5277        parseTree.pprint()
5278    prints::
5279        [['def',
5280          'A',
5281          ['(', 'z', ')'],
5282          ':',
5283          [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5284         'B',
5285         ['def',
5286          'BB',
5287          ['(', 'a', 'b', 'c', ')'],
5288          ':',
5289          [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5290         'C',
5291         'D',
5292         ['def',
5293          'spam',
5294          ['(', 'x', 'y', ')'],
5295          ':',
5296          [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5297    """
5298    def checkPeerIndent(s,l,t):
5299        if l >= len(s): return
5300        curCol = col(l,s)
5301        if curCol != indentStack[-1]:
5302            if curCol > indentStack[-1]:
5303                raise ParseFatalException(s,l,"illegal nesting")
5304            raise ParseException(s,l,"not a peer entry")
5305
5306    def checkSubIndent(s,l,t):
5307        curCol = col(l,s)
5308        if curCol > indentStack[-1]:
5309            indentStack.append( curCol )
5310        else:
5311            raise ParseException(s,l,"not a subentry")
5312
5313    def checkUnindent(s,l,t):
5314        if l >= len(s): return
5315        curCol = col(l,s)
5316        if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5317            raise ParseException(s,l,"not an unindent")
5318        indentStack.pop()
5319
5320    NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5321    INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5322    PEER   = Empty().setParseAction(checkPeerIndent).setName('')
5323    UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5324    if indent:
5325        smExpr = Group( Optional(NL) +
5326            #~ FollowedBy(blockStatementExpr) +
5327            INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5328    else:
5329        smExpr = Group( Optional(NL) +
5330            (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5331    blockStatementExpr.ignore(_bslash + LineEnd())
5332    return smExpr.setName('indented block')
5333
5334alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5335punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5336
5337anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5338_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5339commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5340def replaceHTMLEntity(t):
5341    """Helper parser action to replace common HTML entities with their special characters"""
5342    return _htmlEntityMap.get(t.entity)
5343
5344# it's easy to get these comment structures wrong - they're very common, so may as well make them available
5345cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5346"Comment of the form C{/* ... */}"
5347
5348htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5349"Comment of the form C{<!-- ... -->}"
5350
5351restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5352dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5353"Comment of the form C{// ... (to end of line)}"
5354
5355cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5356"Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5357
5358javaStyleComment = cppStyleComment
5359"Same as C{L{cppStyleComment}}"
5360
5361pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5362"Comment of the form C{# ... (to end of line)}"
5363
5364_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5365                                  Optional( Word(" \t") +
5366                                            ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5367commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5368"""(Deprecated) Predefined expression of 1 or more printable words or quoted strings, separated by commas.
5369   This expression is deprecated in favor of L{pyparsing_common.comma_separated_list}."""
5370
5371# some other useful expressions - using lower-case class name since we are really using this as a namespace
5372class pyparsing_common:
5373    """
5374    Here are some common low-level expressions that may be useful in jump-starting parser development:
5375     - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sci_real>})
5376     - common L{programming identifiers<identifier>}
5377     - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5378     - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5379     - L{UUID<uuid>}
5380     - L{comma-separated list<comma_separated_list>}
5381    Parse actions:
5382     - C{L{convertToInteger}}
5383     - C{L{convertToFloat}}
5384     - C{L{convertToDate}}
5385     - C{L{convertToDatetime}}
5386     - C{L{stripHTMLTags}}
5387     - C{L{upcaseTokens}}
5388     - C{L{downcaseTokens}}
5389
5390    Example::
5391        pyparsing_common.number.runTests('''
5392            # any int or real number, returned as the appropriate type
5393            100
5394            -100
5395            +100
5396            3.14159
5397            6.02e23
5398            1e-12
5399            ''')
5400
5401        pyparsing_common.fnumber.runTests('''
5402            # any int or real number, returned as float
5403            100
5404            -100
5405            +100
5406            3.14159
5407            6.02e23
5408            1e-12
5409            ''')
5410
5411        pyparsing_common.hex_integer.runTests('''
5412            # hex numbers
5413            100
5414            FF
5415            ''')
5416
5417        pyparsing_common.fraction.runTests('''
5418            # fractions
5419            1/2
5420            -3/4
5421            ''')
5422
5423        pyparsing_common.mixed_integer.runTests('''
5424            # mixed fractions
5425            1
5426            1/2
5427            -3/4
5428            1-3/4
5429            ''')
5430
5431        import uuid
5432        pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5433        pyparsing_common.uuid.runTests('''
5434            # uuid
5435            12345678-1234-5678-1234-567812345678
5436            ''')
5437    prints::
5438        # any int or real number, returned as the appropriate type
5439        100
5440        [100]
5441
5442        -100
5443        [-100]
5444
5445        +100
5446        [100]
5447
5448        3.14159
5449        [3.14159]
5450
5451        6.02e23
5452        [6.02e+23]
5453
5454        1e-12
5455        [1e-12]
5456
5457        # any int or real number, returned as float
5458        100
5459        [100.0]
5460
5461        -100
5462        [-100.0]
5463
5464        +100
5465        [100.0]
5466
5467        3.14159
5468        [3.14159]
5469
5470        6.02e23
5471        [6.02e+23]
5472
5473        1e-12
5474        [1e-12]
5475
5476        # hex numbers
5477        100
5478        [256]
5479
5480        FF
5481        [255]
5482
5483        # fractions
5484        1/2
5485        [0.5]
5486
5487        -3/4
5488        [-0.75]
5489
5490        # mixed fractions
5491        1
5492        [1]
5493
5494        1/2
5495        [0.5]
5496
5497        -3/4
5498        [-0.75]
5499
5500        1-3/4
5501        [1.75]
5502
5503        # uuid
5504        12345678-1234-5678-1234-567812345678
5505        [UUID('12345678-1234-5678-1234-567812345678')]
5506    """
5507
5508    convertToInteger = tokenMap(int)
5509    """
5510    Parse action for converting parsed integers to Python int
5511    """
5512
5513    convertToFloat = tokenMap(float)
5514    """
5515    Parse action for converting parsed numbers to Python float
5516    """
5517
5518    integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5519    """expression that parses an unsigned integer, returns an int"""
5520
5521    hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5522    """expression that parses a hexadecimal integer, returns an int"""
5523
5524    signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5525    """expression that parses an integer with optional leading sign, returns an int"""
5526
5527    fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction")
5528    """fractional expression of an integer divided by an integer, returns a float"""
5529    fraction.addParseAction(lambda t: t[0]/t[-1])
5530
5531    mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5532    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5533    mixed_integer.addParseAction(sum)
5534
5535    real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5536    """expression that parses a floating point number and returns a float"""
5537
5538    sci_real = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5539    """expression that parses a floating point number with optional scientific notation and returns a float"""
5540
5541    # streamlining this expression makes the docs nicer-looking
5542    number = (sci_real | real | signed_integer).streamline()
5543    """any numeric expression, returns the corresponding Python type"""
5544
5545    fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5546    """any int or real number, returned as float"""
5547
5548    identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5549    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5550
5551    ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5552    "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5553
5554    _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5555    _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5556    _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5557    _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5558    _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5559    ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5560    "IPv6 address (long, short, or mixed form)"
5561
5562    mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5563    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5564
5565    @staticmethod
5566    def convertToDate(fmt="%Y-%m-%d"):
5567        """
5568        Helper to create a parse action for converting parsed date string to Python datetime.date
5569
5570        Params -
5571         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5572
5573        Example::
5574            date_expr = pyparsing_common.iso8601_date.copy()
5575            date_expr.setParseAction(pyparsing_common.convertToDate())
5576            print(date_expr.parseString("1999-12-31"))
5577        prints::
5578            [datetime.date(1999, 12, 31)]
5579        """
5580        def cvt_fn(s,l,t):
5581            try:
5582                return datetime.strptime(t[0], fmt).date()
5583            except ValueError as ve:
5584                raise ParseException(s, l, str(ve))
5585        return cvt_fn
5586
5587    @staticmethod
5588    def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5589        """
5590        Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5591
5592        Params -
5593         - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5594
5595        Example::
5596            dt_expr = pyparsing_common.iso8601_datetime.copy()
5597            dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5598            print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5599        prints::
5600            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5601        """
5602        def cvt_fn(s,l,t):
5603            try:
5604                return datetime.strptime(t[0], fmt)
5605            except ValueError as ve:
5606                raise ParseException(s, l, str(ve))
5607        return cvt_fn
5608
5609    iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5610    "ISO8601 date (C{yyyy-mm-dd})"
5611
5612    iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5613    "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5614
5615    uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5616    "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5617
5618    _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5619    @staticmethod
5620    def stripHTMLTags(s, l, tokens):
5621        """
5622        Parse action to remove HTML tags from web page HTML source
5623
5624        Example::
5625            # strip HTML links from normal text
5626            text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>'
5627            td,td_end = makeHTMLTags("TD")
5628            table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end
5629
5630            print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page'
5631        """
5632        return pyparsing_common._html_stripper.transformString(tokens[0])
5633
5634    _commasepitem = Combine(OneOrMore(~Literal(",") + ~LineEnd() + Word(printables, excludeChars=',')
5635                                        + Optional( White(" \t") ) ) ).streamline().setName("commaItem")
5636    comma_separated_list = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("comma separated list")
5637    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5638
5639    upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper()))
5640    """Parse action to convert tokens to upper case."""
5641
5642    downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower()))
5643    """Parse action to convert tokens to lower case."""
5644
5645
5646if __name__ == "__main__":
5647
5648    selectToken    = CaselessLiteral("select")
5649    fromToken      = CaselessLiteral("from")
5650
5651    ident          = Word(alphas, alphanums + "_$")
5652
5653    columnName     = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5654    columnNameList = Group(delimitedList(columnName)).setName("columns")
5655    columnSpec     = ('*' | columnNameList)
5656
5657    tableName      = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5658    tableNameList  = Group(delimitedList(tableName)).setName("tables")
5659
5660    simpleSQL      = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5661
5662    # demo runTests method, including embedded comments in test string
5663    simpleSQL.runTests("""
5664        # '*' as column list and dotted table name
5665        select * from SYS.XYZZY
5666
5667        # caseless match on "SELECT", and casts back to "select"
5668        SELECT * from XYZZY, ABC
5669
5670        # list of column names, and mixed case SELECT keyword
5671        Select AA,BB,CC from Sys.dual
5672
5673        # multiple tables
5674        Select A, B, C from Sys.dual, Table2
5675
5676        # invalid SELECT keyword - should fail
5677        Xelect A, B, C from Sys.dual
5678
5679        # incomplete command - should fail
5680        Select
5681
5682        # invalid column name - should fail
5683        Select ^^^ frox Sys.dual
5684
5685        """)
5686
5687    pyparsing_common.number.runTests("""
5688        100
5689        -100
5690        +100
5691        3.14159
5692        6.02e23
5693        1e-12
5694        """)
5695
5696    # any int or real number, returned as float
5697    pyparsing_common.fnumber.runTests("""
5698        100
5699        -100
5700        +100
5701        3.14159
5702        6.02e23
5703        1e-12
5704        """)
5705
5706    pyparsing_common.hex_integer.runTests("""
5707        100
5708        FF
5709        """)
5710
5711    import uuid
5712    pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5713    pyparsing_common.uuid.runTests("""
5714        12345678-1234-5678-1234-567812345678
5715        """)
5716