1# results.py
2from collections.abc import MutableMapping, Mapping, MutableSequence, Iterator
3import pprint
4from weakref import ref as wkref
5from typing import Tuple, Any
6
7str_type: Tuple[type, ...] = (str, bytes)
8_generator_type = type((_ for _ in ()))
9
10
11class _ParseResultsWithOffset:
12    __slots__ = ["tup"]
13
14    def __init__(self, p1, p2):
15        self.tup = (p1, p2)
16
17    def __getitem__(self, i):
18        return self.tup[i]
19
20    def __getstate__(self):
21        return self.tup
22
23    def __setstate__(self, *args):
24        self.tup = args[0]
25
26
27class ParseResults:
28    """Structured parse results, to provide multiple means of access to
29    the parsed data:
30
31    - as a list (``len(results)``)
32    - by list index (``results[0], results[1]``, etc.)
33    - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
34
35    Example::
36
37        integer = Word(nums)
38        date_str = (integer.set_results_name("year") + '/'
39                    + integer.set_results_name("month") + '/'
40                    + integer.set_results_name("day"))
41        # equivalent form:
42        # date_str = (integer("year") + '/'
43        #             + integer("month") + '/'
44        #             + integer("day"))
45
46        # parse_string returns a ParseResults object
47        result = date_str.parse_string("1999/12/31")
48
49        def test(s, fn=repr):
50            print("{} -> {}".format(s, fn(eval(s))))
51        test("list(result)")
52        test("result[0]")
53        test("result['month']")
54        test("result.day")
55        test("'month' in result")
56        test("'minutes' in result")
57        test("result.dump()", str)
58
59    prints::
60
61        list(result) -> ['1999', '/', '12', '/', '31']
62        result[0] -> '1999'
63        result['month'] -> '12'
64        result.day -> '31'
65        'month' in result -> True
66        'minutes' in result -> False
67        result.dump() -> ['1999', '/', '12', '/', '31']
68        - day: 31
69        - month: 12
70        - year: 1999
71    """
72
73    _null_values: Tuple[Any, ...] = (None, [], "", ())
74
75    __slots__ = [
76        "_name",
77        "_parent",
78        "_all_names",
79        "_modal",
80        "_toklist",
81        "_tokdict",
82        "__weakref__",
83    ]
84
85    class List(list):
86        """
87        Simple wrapper class to distinguish parsed list results that should be preserved
88        as actual Python lists, instead of being converted to :class:`ParseResults`:
89
90            LBRACK, RBRACK = map(pp.Suppress, "[]")
91            element = pp.Forward()
92            item = ppc.integer
93            element_list = LBRACK + pp.delimited_list(element) + RBRACK
94
95            # add parse actions to convert from ParseResults to actual Python collection types
96            def as_python_list(t):
97                return pp.ParseResults.List(t.as_list())
98            element_list.add_parse_action(as_python_list)
99
100            element <<= item | element_list
101
102            element.run_tests('''
103                100
104                [2,3,4]
105                [[2, 1],3,4]
106                [(2, 1),3,4]
107                (2,3,4)
108                ''', post_parse=lambda s, r: (r[0], type(r[0])))
109
110        prints:
111
112            100
113            (100, <class 'int'>)
114
115            [2,3,4]
116            ([2, 3, 4], <class 'list'>)
117
118            [[2, 1],3,4]
119            ([[2, 1], 3, 4], <class 'list'>)
120
121        (Used internally by :class:`Group` when `aslist=True`.)
122        """
123
124        def __new__(cls, contained=None):
125            if contained is None:
126                contained = []
127
128            if not isinstance(contained, list):
129                raise TypeError(
130                    "{} may only be constructed with a list,"
131                    " not {}".format(cls.__name__, type(contained).__name__)
132                )
133
134            return list.__new__(cls)
135
136    def __new__(cls, toklist=None, name=None, **kwargs):
137        if isinstance(toklist, ParseResults):
138            return toklist
139        self = object.__new__(cls)
140        self._name = None
141        self._parent = None
142        self._all_names = set()
143
144        if toklist is None:
145            self._toklist = []
146        elif isinstance(toklist, (list, _generator_type)):
147            self._toklist = (
148                [toklist[:]]
149                if isinstance(toklist, ParseResults.List)
150                else list(toklist)
151            )
152        else:
153            self._toklist = [toklist]
154        self._tokdict = dict()
155        return self
156
157    # Performance tuning: we construct a *lot* of these, so keep this
158    # constructor as small and fast as possible
159    def __init__(
160        self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
161    ):
162        self._modal = modal
163        if name is not None and name != "":
164            if isinstance(name, int):
165                name = str(name)
166            if not modal:
167                self._all_names = {name}
168            self._name = name
169            if toklist not in self._null_values:
170                if isinstance(toklist, (str_type, type)):
171                    toklist = [toklist]
172                if asList:
173                    if isinstance(toklist, ParseResults):
174                        self[name] = _ParseResultsWithOffset(
175                            ParseResults(toklist._toklist), 0
176                        )
177                    else:
178                        self[name] = _ParseResultsWithOffset(
179                            ParseResults(toklist[0]), 0
180                        )
181                    self[name]._name = name
182                else:
183                    try:
184                        self[name] = toklist[0]
185                    except (KeyError, TypeError, IndexError):
186                        if toklist is not self:
187                            self[name] = toklist
188                        else:
189                            self._name = name
190
191    def __getitem__(self, i):
192        if isinstance(i, (int, slice)):
193            return self._toklist[i]
194        else:
195            if i not in self._all_names:
196                return self._tokdict[i][-1][0]
197            else:
198                return ParseResults([v[0] for v in self._tokdict[i]])
199
200    def __setitem__(self, k, v, isinstance=isinstance):
201        if isinstance(v, _ParseResultsWithOffset):
202            self._tokdict[k] = self._tokdict.get(k, list()) + [v]
203            sub = v[0]
204        elif isinstance(k, (int, slice)):
205            self._toklist[k] = v
206            sub = v
207        else:
208            self._tokdict[k] = self._tokdict.get(k, list()) + [
209                _ParseResultsWithOffset(v, 0)
210            ]
211            sub = v
212        if isinstance(sub, ParseResults):
213            sub._parent = wkref(self)
214
215    def __delitem__(self, i):
216        if isinstance(i, (int, slice)):
217            mylen = len(self._toklist)
218            del self._toklist[i]
219
220            # convert int to slice
221            if isinstance(i, int):
222                if i < 0:
223                    i += mylen
224                i = slice(i, i + 1)
225            # get removed indices
226            removed = list(range(*i.indices(mylen)))
227            removed.reverse()
228            # fixup indices in token dictionary
229            for name, occurrences in self._tokdict.items():
230                for j in removed:
231                    for k, (value, position) in enumerate(occurrences):
232                        occurrences[k] = _ParseResultsWithOffset(
233                            value, position - (position > j)
234                        )
235        else:
236            del self._tokdict[i]
237
238    def __contains__(self, k) -> bool:
239        return k in self._tokdict
240
241    def __len__(self) -> int:
242        return len(self._toklist)
243
244    def __bool__(self) -> bool:
245        return not not (self._toklist or self._tokdict)
246
247    def __iter__(self) -> Iterator:
248        return iter(self._toklist)
249
250    def __reversed__(self) -> Iterator:
251        return iter(self._toklist[::-1])
252
253    def keys(self):
254        return iter(self._tokdict)
255
256    def values(self):
257        return (self[k] for k in self.keys())
258
259    def items(self):
260        return ((k, self[k]) for k in self.keys())
261
262    def haskeys(self) -> bool:
263        """
264        Since ``keys()`` returns an iterator, this method is helpful in bypassing
265        code that looks for the existence of any defined results names."""
266        return bool(self._tokdict)
267
268    def pop(self, *args, **kwargs):
269        """
270        Removes and returns item at specified index (default= ``last``).
271        Supports both ``list`` and ``dict`` semantics for ``pop()``. If
272        passed no argument or an integer argument, it will use ``list``
273        semantics and pop tokens from the list of parsed tokens. If passed
274        a non-integer argument (most likely a string), it will use ``dict``
275        semantics and pop the corresponding value from any defined results
276        names. A second default return value argument is supported, just as in
277        ``dict.pop()``.
278
279        Example::
280
281            numlist = Word(nums)[...]
282            print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
283
284            def remove_first(tokens):
285                tokens.pop(0)
286            numlist.add_parse_action(remove_first)
287            print(numlist.parse_string("0 123 321")) # -> ['123', '321']
288
289            label = Word(alphas)
290            patt = label("LABEL") + OneOrMore(Word(nums))
291            print(patt.parse_string("AAB 123 321").dump())
292
293            # Use pop() in a parse action to remove named result (note that corresponding value is not
294            # removed from list form of results)
295            def remove_LABEL(tokens):
296                tokens.pop("LABEL")
297                return tokens
298            patt.add_parse_action(remove_LABEL)
299            print(patt.parse_string("AAB 123 321").dump())
300
301        prints::
302
303            ['AAB', '123', '321']
304            - LABEL: AAB
305
306            ['AAB', '123', '321']
307        """
308        if not args:
309            args = [-1]
310        for k, v in kwargs.items():
311            if k == "default":
312                args = (args[0], v)
313            else:
314                raise TypeError(
315                    "pop() got an unexpected keyword argument {!r}".format(k)
316                )
317        if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
318            index = args[0]
319            ret = self[index]
320            del self[index]
321            return ret
322        else:
323            defaultvalue = args[1]
324            return defaultvalue
325
326    def get(self, key, default_value=None):
327        """
328        Returns named result matching the given key, or if there is no
329        such name, then returns the given ``default_value`` or ``None`` if no
330        ``default_value`` is specified.
331
332        Similar to ``dict.get()``.
333
334        Example::
335
336            integer = Word(nums)
337            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
338
339            result = date_str.parse_string("1999/12/31")
340            print(result.get("year")) # -> '1999'
341            print(result.get("hour", "not specified")) # -> 'not specified'
342            print(result.get("hour")) # -> None
343        """
344        if key in self:
345            return self[key]
346        else:
347            return default_value
348
349    def insert(self, index, ins_string):
350        """
351        Inserts new element at location index in the list of parsed tokens.
352
353        Similar to ``list.insert()``.
354
355        Example::
356
357            numlist = Word(nums)[...]
358            print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
359
360            # use a parse action to insert the parse location in the front of the parsed results
361            def insert_locn(locn, tokens):
362                tokens.insert(0, locn)
363            numlist.add_parse_action(insert_locn)
364            print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
365        """
366        self._toklist.insert(index, ins_string)
367        # fixup indices in token dictionary
368        for name, occurrences in self._tokdict.items():
369            for k, (value, position) in enumerate(occurrences):
370                occurrences[k] = _ParseResultsWithOffset(
371                    value, position + (position > index)
372                )
373
374    def append(self, item):
375        """
376        Add single element to end of ``ParseResults`` list of elements.
377
378        Example::
379
380            numlist = Word(nums)[...]
381            print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
382
383            # use a parse action to compute the sum of the parsed integers, and add it to the end
384            def append_sum(tokens):
385                tokens.append(sum(map(int, tokens)))
386            numlist.add_parse_action(append_sum)
387            print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
388        """
389        self._toklist.append(item)
390
391    def extend(self, itemseq):
392        """
393        Add sequence of elements to end of ``ParseResults`` list of elements.
394
395        Example::
396
397            patt = OneOrMore(Word(alphas))
398
399            # use a parse action to append the reverse of the matched strings, to make a palindrome
400            def make_palindrome(tokens):
401                tokens.extend(reversed([t[::-1] for t in tokens]))
402                return ''.join(tokens)
403            patt.add_parse_action(make_palindrome)
404            print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
405        """
406        if isinstance(itemseq, ParseResults):
407            self.__iadd__(itemseq)
408        else:
409            self._toklist.extend(itemseq)
410
411    def clear(self):
412        """
413        Clear all elements and results names.
414        """
415        del self._toklist[:]
416        self._tokdict.clear()
417
418    def __getattr__(self, name):
419        try:
420            return self[name]
421        except KeyError:
422            if name.startswith("__"):
423                raise AttributeError(name)
424            return ""
425
426    def __add__(self, other) -> "ParseResults":
427        ret = self.copy()
428        ret += other
429        return ret
430
431    def __iadd__(self, other) -> "ParseResults":
432        if other._tokdict:
433            offset = len(self._toklist)
434            addoffset = lambda a: offset if a < 0 else a + offset
435            otheritems = other._tokdict.items()
436            otherdictitems = [
437                (k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
438                for k, vlist in otheritems
439                for v in vlist
440            ]
441            for k, v in otherdictitems:
442                self[k] = v
443                if isinstance(v[0], ParseResults):
444                    v[0]._parent = wkref(self)
445
446        self._toklist += other._toklist
447        self._all_names |= other._all_names
448        return self
449
450    def __radd__(self, other) -> "ParseResults":
451        if isinstance(other, int) and other == 0:
452            # useful for merging many ParseResults using sum() builtin
453            return self.copy()
454        else:
455            # this may raise a TypeError - so be it
456            return other + self
457
458    def __repr__(self) -> str:
459        return "{}({!r}, {})".format(type(self).__name__, self._toklist, self.as_dict())
460
461    def __str__(self) -> str:
462        return (
463            "["
464            + ", ".join(
465                str(i) if isinstance(i, ParseResults) else repr(i)
466                for i in self._toklist
467            )
468            + "]"
469        )
470
471    def _asStringList(self, sep=""):
472        out = []
473        for item in self._toklist:
474            if out and sep:
475                out.append(sep)
476            if isinstance(item, ParseResults):
477                out += item._asStringList()
478            else:
479                out.append(str(item))
480        return out
481
482    def as_list(self) -> list:
483        """
484        Returns the parse results as a nested list of matching tokens, all converted to strings.
485
486        Example::
487
488            patt = OneOrMore(Word(alphas))
489            result = patt.parse_string("sldkj lsdkj sldkj")
490            # even though the result prints in string-like form, it is actually a pyparsing ParseResults
491            print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
492
493            # Use as_list() to create an actual list
494            result_list = result.as_list()
495            print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
496        """
497        return [
498            res.as_list() if isinstance(res, ParseResults) else res
499            for res in self._toklist
500        ]
501
502    def as_dict(self) -> dict:
503        """
504        Returns the named parse results as a nested dictionary.
505
506        Example::
507
508            integer = Word(nums)
509            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
510
511            result = date_str.parse_string('12/31/1999')
512            print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
513
514            result_dict = result.as_dict()
515            print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
516
517            # even though a ParseResults supports dict-like access, sometime you just need to have a dict
518            import json
519            print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
520            print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
521        """
522
523        def to_item(obj):
524            if isinstance(obj, ParseResults):
525                return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
526            else:
527                return obj
528
529        return dict((k, to_item(v)) for k, v in self.items())
530
531    def copy(self) -> "ParseResults":
532        """
533        Returns a new copy of a :class:`ParseResults` object.
534        """
535        ret = ParseResults(self._toklist)
536        ret._tokdict = self._tokdict.copy()
537        ret._parent = self._parent
538        ret._all_names |= self._all_names
539        ret._name = self._name
540        return ret
541
542    def get_name(self):
543        r"""
544        Returns the results name for this token expression. Useful when several
545        different expressions might match at a particular location.
546
547        Example::
548
549            integer = Word(nums)
550            ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
551            house_number_expr = Suppress('#') + Word(nums, alphanums)
552            user_data = (Group(house_number_expr)("house_number")
553                        | Group(ssn_expr)("ssn")
554                        | Group(integer)("age"))
555            user_info = OneOrMore(user_data)
556
557            result = user_info.parse_string("22 111-22-3333 #221B")
558            for item in result:
559                print(item.get_name(), ':', item[0])
560
561        prints::
562
563            age : 22
564            ssn : 111-22-3333
565            house_number : 221B
566        """
567        if self._name:
568            return self._name
569        elif self._parent:
570            par = self._parent()
571
572            def find_in_parent(sub):
573                return next(
574                    (
575                        k
576                        for k, vlist in par._tokdict.items()
577                        for v, loc in vlist
578                        if sub is v
579                    ),
580                    None,
581                )
582
583            return find_in_parent(self) if par else None
584        elif (
585            len(self) == 1
586            and len(self._tokdict) == 1
587            and next(iter(self._tokdict.values()))[0][1] in (0, -1)
588        ):
589            return next(iter(self._tokdict.keys()))
590        else:
591            return None
592
593    def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
594        """
595        Diagnostic method for listing out the contents of
596        a :class:`ParseResults`. Accepts an optional ``indent`` argument so
597        that this string can be embedded in a nested display of other data.
598
599        Example::
600
601            integer = Word(nums)
602            date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
603
604            result = date_str.parse_string('12/31/1999')
605            print(result.dump())
606
607        prints::
608
609            ['12', '/', '31', '/', '1999']
610            - day: 1999
611            - month: 31
612            - year: 12
613        """
614        out = []
615        NL = "\n"
616        out.append(indent + str(self.as_list()) if include_list else "")
617
618        if full:
619            if self.haskeys():
620                items = sorted((str(k), v) for k, v in self.items())
621                for k, v in items:
622                    if out:
623                        out.append(NL)
624                    out.append("{}{}- {}: ".format(indent, ("  " * _depth), k))
625                    if isinstance(v, ParseResults):
626                        if v:
627                            out.append(
628                                v.dump(
629                                    indent=indent,
630                                    full=full,
631                                    include_list=include_list,
632                                    _depth=_depth + 1,
633                                )
634                            )
635                        else:
636                            out.append(str(v))
637                    else:
638                        out.append(repr(v))
639            if any(isinstance(vv, ParseResults) for vv in self):
640                v = self
641                for i, vv in enumerate(v):
642                    if isinstance(vv, ParseResults):
643                        out.append(
644                            "\n{}{}[{}]:\n{}{}{}".format(
645                                indent,
646                                ("  " * (_depth)),
647                                i,
648                                indent,
649                                ("  " * (_depth + 1)),
650                                vv.dump(
651                                    indent=indent,
652                                    full=full,
653                                    include_list=include_list,
654                                    _depth=_depth + 1,
655                                ),
656                            )
657                        )
658                    else:
659                        out.append(
660                            "\n%s%s[%d]:\n%s%s%s"
661                            % (
662                                indent,
663                                ("  " * (_depth)),
664                                i,
665                                indent,
666                                ("  " * (_depth + 1)),
667                                str(vv),
668                            )
669                        )
670
671        return "".join(out)
672
673    def pprint(self, *args, **kwargs):
674        """
675        Pretty-printer for parsed results as a list, using the
676        `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
677        Accepts additional positional or keyword args as defined for
678        `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
679
680        Example::
681
682            ident = Word(alphas, alphanums)
683            num = Word(nums)
684            func = Forward()
685            term = ident | num | Group('(' + func + ')')
686            func <<= ident + Group(Optional(delimited_list(term)))
687            result = func.parse_string("fna a,b,(fnb c,d,200),100")
688            result.pprint(width=40)
689
690        prints::
691
692            ['fna',
693             ['a',
694              'b',
695              ['(', 'fnb', ['c', 'd', '200'], ')'],
696              '100']]
697        """
698        pprint.pprint(self.as_list(), *args, **kwargs)
699
700    # add support for pickle protocol
701    def __getstate__(self):
702        return (
703            self._toklist,
704            (
705                self._tokdict.copy(),
706                self._parent is not None and self._parent() or None,
707                self._all_names,
708                self._name,
709            ),
710        )
711
712    def __setstate__(self, state):
713        self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
714        self._all_names = set(inAccumNames)
715        if par is not None:
716            self._parent = wkref(par)
717        else:
718            self._parent = None
719
720    def __getnewargs__(self):
721        return self._toklist, self._name
722
723    def __dir__(self):
724        return dir(type(self)) + list(self.keys())
725
726    @classmethod
727    def from_dict(cls, other, name=None) -> "ParseResults":
728        """
729        Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
730        name-value relations as results names. If an optional ``name`` argument is
731        given, a nested ``ParseResults`` will be returned.
732        """
733
734        def is_iterable(obj):
735            try:
736                iter(obj)
737            except Exception:
738                return False
739            else:
740                return not isinstance(obj, str_type)
741
742        ret = cls([])
743        for k, v in other.items():
744            if isinstance(v, Mapping):
745                ret += cls.from_dict(v, name=k)
746            else:
747                ret += cls([v], name=k, asList=is_iterable(v))
748        if name is not None:
749            ret = cls([ret], name=name)
750        return ret
751
752    asList = as_list
753    asDict = as_dict
754    getName = get_name
755
756
757MutableMapping.register(ParseResults)
758MutableSequence.register(ParseResults)
759