1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""
5Python parse tree definitions.
6
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
15import sys
16from io import StringIO
17
18HUGE = 0x7FFFFFFF  # maximum repeat count, default max
19
20_type_reprs = {}
21def type_repr(type_num):
22    global _type_reprs
23    if not _type_reprs:
24        from .pygram import python_symbols
25        # printing tokens is possible but not as useful
26        # from .pgen2 import token // token.__dict__.items():
27        for name, val in python_symbols.__dict__.items():
28            if type(val) == int: _type_reprs[val] = name
29    return _type_reprs.setdefault(type_num, type_num)
30
31class Base(object):
32
33    """
34    Abstract base class for Node and Leaf.
35
36    This provides some default functionality and boilerplate using the
37    template pattern.
38
39    A node may be a subnode of at most one parent.
40    """
41
42    # Default values for instance variables
43    type = None    # int: token number (< 256) or symbol number (>= 256)
44    parent = None  # Parent node pointer, or None
45    children = ()  # Tuple of subnodes
46    was_changed = False
47    was_checked = False
48
49    def __new__(cls, *args, **kwds):
50        """Constructor that prevents Base from being instantiated."""
51        assert cls is not Base, "Cannot instantiate Base"
52        return object.__new__(cls)
53
54    def __eq__(self, other):
55        """
56        Compare two nodes for equality.
57
58        This calls the method _eq().
59        """
60        if self.__class__ is not other.__class__:
61            return NotImplemented
62        return self._eq(other)
63
64    __hash__ = None # For Py3 compatibility.
65
66    def _eq(self, other):
67        """
68        Compare two nodes for equality.
69
70        This is called by __eq__ and __ne__.  It is only called if the two nodes
71        have the same type.  This must be implemented by the concrete subclass.
72        Nodes should be considered equal if they have the same structure,
73        ignoring the prefix string and other context information.
74        """
75        raise NotImplementedError
76
77    def clone(self):
78        """
79        Return a cloned (deep) copy of self.
80
81        This must be implemented by the concrete subclass.
82        """
83        raise NotImplementedError
84
85    def post_order(self):
86        """
87        Return a post-order iterator for the tree.
88
89        This must be implemented by the concrete subclass.
90        """
91        raise NotImplementedError
92
93    def pre_order(self):
94        """
95        Return a pre-order iterator for the tree.
96
97        This must be implemented by the concrete subclass.
98        """
99        raise NotImplementedError
100
101    def replace(self, new):
102        """Replace this node with a new one in the parent."""
103        assert self.parent is not None, str(self)
104        assert new is not None
105        if not isinstance(new, list):
106            new = [new]
107        l_children = []
108        found = False
109        for ch in self.parent.children:
110            if ch is self:
111                assert not found, (self.parent.children, self, new)
112                if new is not None:
113                    l_children.extend(new)
114                found = True
115            else:
116                l_children.append(ch)
117        assert found, (self.children, self, new)
118        self.parent.changed()
119        self.parent.children = l_children
120        for x in new:
121            x.parent = self.parent
122        self.parent = None
123
124    def get_lineno(self):
125        """Return the line number which generated the invocant node."""
126        node = self
127        while not isinstance(node, Leaf):
128            if not node.children:
129                return
130            node = node.children[0]
131        return node.lineno
132
133    def changed(self):
134        if self.parent:
135            self.parent.changed()
136        self.was_changed = True
137
138    def remove(self):
139        """
140        Remove the node from the tree. Returns the position of the node in its
141        parent's children before it was removed.
142        """
143        if self.parent:
144            for i, node in enumerate(self.parent.children):
145                if node is self:
146                    self.parent.changed()
147                    del self.parent.children[i]
148                    self.parent = None
149                    return i
150
151    @property
152    def next_sibling(self):
153        """
154        The node immediately following the invocant in their parent's children
155        list. If the invocant does not have a next sibling, it is None
156        """
157        if self.parent is None:
158            return None
159
160        # Can't use index(); we need to test by identity
161        for i, child in enumerate(self.parent.children):
162            if child is self:
163                try:
164                    return self.parent.children[i+1]
165                except IndexError:
166                    return None
167
168    @property
169    def prev_sibling(self):
170        """
171        The node immediately preceding the invocant in their parent's children
172        list. If the invocant does not have a previous sibling, it is None.
173        """
174        if self.parent is None:
175            return None
176
177        # Can't use index(); we need to test by identity
178        for i, child in enumerate(self.parent.children):
179            if child is self:
180                if i == 0:
181                    return None
182                return self.parent.children[i-1]
183
184    def leaves(self):
185        for child in self.children:
186            yield from child.leaves()
187
188    def depth(self):
189        if self.parent is None:
190            return 0
191        return 1 + self.parent.depth()
192
193    def get_suffix(self):
194        """
195        Return the string immediately following the invocant node. This is
196        effectively equivalent to node.next_sibling.prefix
197        """
198        next_sib = self.next_sibling
199        if next_sib is None:
200            return ""
201        return next_sib.prefix
202
203    if sys.version_info < (3, 0):
204        def __str__(self):
205            return str(self).encode("ascii")
206
207class Node(Base):
208
209    """Concrete implementation for interior nodes."""
210
211    def __init__(self,type, children,
212                 context=None,
213                 prefix=None,
214                 fixers_applied=None):
215        """
216        Initializer.
217
218        Takes a type constant (a symbol number >= 256), a sequence of
219        child nodes, and an optional context keyword argument.
220
221        As a side effect, the parent pointers of the children are updated.
222        """
223        assert type >= 256, type
224        self.type = type
225        self.children = list(children)
226        for ch in self.children:
227            assert ch.parent is None, repr(ch)
228            ch.parent = self
229        if prefix is not None:
230            self.prefix = prefix
231        if fixers_applied:
232            self.fixers_applied = fixers_applied[:]
233        else:
234            self.fixers_applied = None
235
236    def __repr__(self):
237        """Return a canonical string representation."""
238        return "%s(%s, %r)" % (self.__class__.__name__,
239                               type_repr(self.type),
240                               self.children)
241
242    def __unicode__(self):
243        """
244        Return a pretty string representation.
245
246        This reproduces the input source exactly.
247        """
248        return "".join(map(str, self.children))
249
250    if sys.version_info > (3, 0):
251        __str__ = __unicode__
252
253    def _eq(self, other):
254        """Compare two nodes for equality."""
255        return (self.type, self.children) == (other.type, other.children)
256
257    def clone(self):
258        """Return a cloned (deep) copy of self."""
259        return Node(self.type, [ch.clone() for ch in self.children],
260                    fixers_applied=self.fixers_applied)
261
262    def post_order(self):
263        """Return a post-order iterator for the tree."""
264        for child in self.children:
265            yield from child.post_order()
266        yield self
267
268    def pre_order(self):
269        """Return a pre-order iterator for the tree."""
270        yield self
271        for child in self.children:
272            yield from child.pre_order()
273
274    @property
275    def prefix(self):
276        """
277        The whitespace and comments preceding this node in the input.
278        """
279        if not self.children:
280            return ""
281        return self.children[0].prefix
282
283    @prefix.setter
284    def prefix(self, prefix):
285        if self.children:
286            self.children[0].prefix = prefix
287
288    def set_child(self, i, child):
289        """
290        Equivalent to 'node.children[i] = child'. This method also sets the
291        child's parent attribute appropriately.
292        """
293        child.parent = self
294        self.children[i].parent = None
295        self.children[i] = child
296        self.changed()
297
298    def insert_child(self, i, child):
299        """
300        Equivalent to 'node.children.insert(i, child)'. This method also sets
301        the child's parent attribute appropriately.
302        """
303        child.parent = self
304        self.children.insert(i, child)
305        self.changed()
306
307    def append_child(self, child):
308        """
309        Equivalent to 'node.children.append(child)'. This method also sets the
310        child's parent attribute appropriately.
311        """
312        child.parent = self
313        self.children.append(child)
314        self.changed()
315
316
317class Leaf(Base):
318
319    """Concrete implementation for leaf nodes."""
320
321    # Default values for instance variables
322    _prefix = ""  # Whitespace and comments preceding this token in the input
323    lineno = 0    # Line where this token starts in the input
324    column = 0    # Column where this token tarts in the input
325
326    def __init__(self, type, value,
327                 context=None,
328                 prefix=None,
329                 fixers_applied=[]):
330        """
331        Initializer.
332
333        Takes a type constant (a token number < 256), a string value, and an
334        optional context keyword argument.
335        """
336        assert 0 <= type < 256, type
337        if context is not None:
338            self._prefix, (self.lineno, self.column) = context
339        self.type = type
340        self.value = value
341        if prefix is not None:
342            self._prefix = prefix
343        self.fixers_applied = fixers_applied[:]
344
345    def __repr__(self):
346        """Return a canonical string representation."""
347        return "%s(%r, %r)" % (self.__class__.__name__,
348                               self.type,
349                               self.value)
350
351    def __unicode__(self):
352        """
353        Return a pretty string representation.
354
355        This reproduces the input source exactly.
356        """
357        return self.prefix + str(self.value)
358
359    if sys.version_info > (3, 0):
360        __str__ = __unicode__
361
362    def _eq(self, other):
363        """Compare two nodes for equality."""
364        return (self.type, self.value) == (other.type, other.value)
365
366    def clone(self):
367        """Return a cloned (deep) copy of self."""
368        return Leaf(self.type, self.value,
369                    (self.prefix, (self.lineno, self.column)),
370                    fixers_applied=self.fixers_applied)
371
372    def leaves(self):
373        yield self
374
375    def post_order(self):
376        """Return a post-order iterator for the tree."""
377        yield self
378
379    def pre_order(self):
380        """Return a pre-order iterator for the tree."""
381        yield self
382
383    @property
384    def prefix(self):
385        """
386        The whitespace and comments preceding this token in the input.
387        """
388        return self._prefix
389
390    @prefix.setter
391    def prefix(self, prefix):
392        self.changed()
393        self._prefix = prefix
394
395def convert(gr, raw_node):
396    """
397    Convert raw node information to a Node or Leaf instance.
398
399    This is passed to the parser driver which calls it whenever a reduction of a
400    grammar rule produces a new complete node, so that the tree is build
401    strictly bottom-up.
402    """
403    type, value, context, children = raw_node
404    if children or type in gr.number2symbol:
405        # If there's exactly one child, return that child instead of
406        # creating a new node.
407        if len(children) == 1:
408            return children[0]
409        return Node(type, children, context=context)
410    else:
411        return Leaf(type, value, context=context)
412
413
414class BasePattern(object):
415
416    """
417    A pattern is a tree matching pattern.
418
419    It looks for a specific node type (token or symbol), and
420    optionally for a specific content.
421
422    This is an abstract base class.  There are three concrete
423    subclasses:
424
425    - LeafPattern matches a single leaf node;
426    - NodePattern matches a single node (usually non-leaf);
427    - WildcardPattern matches a sequence of nodes of variable length.
428    """
429
430    # Defaults for instance variables
431    type = None     # Node type (token if < 256, symbol if >= 256)
432    content = None  # Optional content matching pattern
433    name = None     # Optional name used to store match in results dict
434
435    def __new__(cls, *args, **kwds):
436        """Constructor that prevents BasePattern from being instantiated."""
437        assert cls is not BasePattern, "Cannot instantiate BasePattern"
438        return object.__new__(cls)
439
440    def __repr__(self):
441        args = [type_repr(self.type), self.content, self.name]
442        while args and args[-1] is None:
443            del args[-1]
444        return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
445
446    def optimize(self):
447        """
448        A subclass can define this as a hook for optimizations.
449
450        Returns either self or another node with the same effect.
451        """
452        return self
453
454    def match(self, node, results=None):
455        """
456        Does this pattern exactly match a node?
457
458        Returns True if it matches, False if not.
459
460        If results is not None, it must be a dict which will be
461        updated with the nodes matching named subpatterns.
462
463        Default implementation for non-wildcard patterns.
464        """
465        if self.type is not None and node.type != self.type:
466            return False
467        if self.content is not None:
468            r = None
469            if results is not None:
470                r = {}
471            if not self._submatch(node, r):
472                return False
473            if r:
474                results.update(r)
475        if results is not None and self.name:
476            results[self.name] = node
477        return True
478
479    def match_seq(self, nodes, results=None):
480        """
481        Does this pattern exactly match a sequence of nodes?
482
483        Default implementation for non-wildcard patterns.
484        """
485        if len(nodes) != 1:
486            return False
487        return self.match(nodes[0], results)
488
489    def generate_matches(self, nodes):
490        """
491        Generator yielding all matches for this pattern.
492
493        Default implementation for non-wildcard patterns.
494        """
495        r = {}
496        if nodes and self.match(nodes[0], r):
497            yield 1, r
498
499
500class LeafPattern(BasePattern):
501
502    def __init__(self, type=None, content=None, name=None):
503        """
504        Initializer.  Takes optional type, content, and name.
505
506        The type, if given must be a token type (< 256).  If not given,
507        this matches any *leaf* node; the content may still be required.
508
509        The content, if given, must be a string.
510
511        If a name is given, the matching node is stored in the results
512        dict under that key.
513        """
514        if type is not None:
515            assert 0 <= type < 256, type
516        if content is not None:
517            assert isinstance(content, str), repr(content)
518        self.type = type
519        self.content = content
520        self.name = name
521
522    def match(self, node, results=None):
523        """Override match() to insist on a leaf node."""
524        if not isinstance(node, Leaf):
525            return False
526        return BasePattern.match(self, node, results)
527
528    def _submatch(self, node, results=None):
529        """
530        Match the pattern's content to the node's children.
531
532        This assumes the node type matches and self.content is not None.
533
534        Returns True if it matches, False if not.
535
536        If results is not None, it must be a dict which will be
537        updated with the nodes matching named subpatterns.
538
539        When returning False, the results dict may still be updated.
540        """
541        return self.content == node.value
542
543
544class NodePattern(BasePattern):
545
546    wildcards = False
547
548    def __init__(self, type=None, content=None, name=None):
549        """
550        Initializer.  Takes optional type, content, and name.
551
552        The type, if given, must be a symbol type (>= 256).  If the
553        type is None this matches *any* single node (leaf or not),
554        except if content is not None, in which it only matches
555        non-leaf nodes that also match the content pattern.
556
557        The content, if not None, must be a sequence of Patterns that
558        must match the node's children exactly.  If the content is
559        given, the type must not be None.
560
561        If a name is given, the matching node is stored in the results
562        dict under that key.
563        """
564        if type is not None:
565            assert type >= 256, type
566        if content is not None:
567            assert not isinstance(content, str), repr(content)
568            content = list(content)
569            for i, item in enumerate(content):
570                assert isinstance(item, BasePattern), (i, item)
571                if isinstance(item, WildcardPattern):
572                    self.wildcards = True
573        self.type = type
574        self.content = content
575        self.name = name
576
577    def _submatch(self, node, results=None):
578        """
579        Match the pattern's content to the node's children.
580
581        This assumes the node type matches and self.content is not None.
582
583        Returns True if it matches, False if not.
584
585        If results is not None, it must be a dict which will be
586        updated with the nodes matching named subpatterns.
587
588        When returning False, the results dict may still be updated.
589        """
590        if self.wildcards:
591            for c, r in generate_matches(self.content, node.children):
592                if c == len(node.children):
593                    if results is not None:
594                        results.update(r)
595                    return True
596            return False
597        if len(self.content) != len(node.children):
598            return False
599        for subpattern, child in zip(self.content, node.children):
600            if not subpattern.match(child, results):
601                return False
602        return True
603
604
605class WildcardPattern(BasePattern):
606
607    """
608    A wildcard pattern can match zero or more nodes.
609
610    This has all the flexibility needed to implement patterns like:
611
612    .*      .+      .?      .{m,n}
613    (a b c | d e | f)
614    (...)*  (...)+  (...)?  (...){m,n}
615
616    except it always uses non-greedy matching.
617    """
618
619    def __init__(self, content=None, min=0, max=HUGE, name=None):
620        """
621        Initializer.
622
623        Args:
624            content: optional sequence of subsequences of patterns;
625                     if absent, matches one node;
626                     if present, each subsequence is an alternative [*]
627            min: optional minimum number of times to match, default 0
628            max: optional maximum number of times to match, default HUGE
629            name: optional name assigned to this match
630
631        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
632            equivalent to (a b c | d e | f g h); if content is None,
633            this is equivalent to '.' in regular expression terms.
634            The min and max parameters work as follows:
635                min=0, max=maxint: .*
636                min=1, max=maxint: .+
637                min=0, max=1: .?
638                min=1, max=1: .
639            If content is not None, replace the dot with the parenthesized
640            list of alternatives, e.g. (a b c | d e | f g h)*
641        """
642        assert 0 <= min <= max <= HUGE, (min, max)
643        if content is not None:
644            content = tuple(map(tuple, content))  # Protect against alterations
645            # Check sanity of alternatives
646            assert len(content), repr(content)  # Can't have zero alternatives
647            for alt in content:
648                assert len(alt), repr(alt) # Can have empty alternatives
649        self.content = content
650        self.min = min
651        self.max = max
652        self.name = name
653
654    def optimize(self):
655        """Optimize certain stacked wildcard patterns."""
656        subpattern = None
657        if (self.content is not None and
658            len(self.content) == 1 and len(self.content[0]) == 1):
659            subpattern = self.content[0][0]
660        if self.min == 1 and self.max == 1:
661            if self.content is None:
662                return NodePattern(name=self.name)
663            if subpattern is not None and  self.name == subpattern.name:
664                return subpattern.optimize()
665        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
666            subpattern.min <= 1 and self.name == subpattern.name):
667            return WildcardPattern(subpattern.content,
668                                   self.min*subpattern.min,
669                                   self.max*subpattern.max,
670                                   subpattern.name)
671        return self
672
673    def match(self, node, results=None):
674        """Does this pattern exactly match a node?"""
675        return self.match_seq([node], results)
676
677    def match_seq(self, nodes, results=None):
678        """Does this pattern exactly match a sequence of nodes?"""
679        for c, r in self.generate_matches(nodes):
680            if c == len(nodes):
681                if results is not None:
682                    results.update(r)
683                    if self.name:
684                        results[self.name] = list(nodes)
685                return True
686        return False
687
688    def generate_matches(self, nodes):
689        """
690        Generator yielding matches for a sequence of nodes.
691
692        Args:
693            nodes: sequence of nodes
694
695        Yields:
696            (count, results) tuples where:
697            count: the match comprises nodes[:count];
698            results: dict containing named submatches.
699        """
700        if self.content is None:
701            # Shortcut for special case (see __init__.__doc__)
702            for count in range(self.min, 1 + min(len(nodes), self.max)):
703                r = {}
704                if self.name:
705                    r[self.name] = nodes[:count]
706                yield count, r
707        elif self.name == "bare_name":
708            yield self._bare_name_matches(nodes)
709        else:
710            # The reason for this is that hitting the recursion limit usually
711            # results in some ugly messages about how RuntimeErrors are being
712            # ignored. We only have to do this on CPython, though, because other
713            # implementations don't have this nasty bug in the first place.
714            if hasattr(sys, "getrefcount"):
715                save_stderr = sys.stderr
716                sys.stderr = StringIO()
717            try:
718                for count, r in self._recursive_matches(nodes, 0):
719                    if self.name:
720                        r[self.name] = nodes[:count]
721                    yield count, r
722            except RuntimeError:
723                # We fall back to the iterative pattern matching scheme if the recursive
724                # scheme hits the recursion limit.
725                for count, r in self._iterative_matches(nodes):
726                    if self.name:
727                        r[self.name] = nodes[:count]
728                    yield count, r
729            finally:
730                if hasattr(sys, "getrefcount"):
731                    sys.stderr = save_stderr
732
733    def _iterative_matches(self, nodes):
734        """Helper to iteratively yield the matches."""
735        nodelen = len(nodes)
736        if 0 >= self.min:
737            yield 0, {}
738
739        results = []
740        # generate matches that use just one alt from self.content
741        for alt in self.content:
742            for c, r in generate_matches(alt, nodes):
743                yield c, r
744                results.append((c, r))
745
746        # for each match, iterate down the nodes
747        while results:
748            new_results = []
749            for c0, r0 in results:
750                # stop if the entire set of nodes has been matched
751                if c0 < nodelen and c0 <= self.max:
752                    for alt in self.content:
753                        for c1, r1 in generate_matches(alt, nodes[c0:]):
754                            if c1 > 0:
755                                r = {}
756                                r.update(r0)
757                                r.update(r1)
758                                yield c0 + c1, r
759                                new_results.append((c0 + c1, r))
760            results = new_results
761
762    def _bare_name_matches(self, nodes):
763        """Special optimized matcher for bare_name."""
764        count = 0
765        r = {}
766        done = False
767        max = len(nodes)
768        while not done and count < max:
769            done = True
770            for leaf in self.content:
771                if leaf[0].match(nodes[count], r):
772                    count += 1
773                    done = False
774                    break
775        r[self.name] = nodes[:count]
776        return count, r
777
778    def _recursive_matches(self, nodes, count):
779        """Helper to recursively yield the matches."""
780        assert self.content is not None
781        if count >= self.min:
782            yield 0, {}
783        if count < self.max:
784            for alt in self.content:
785                for c0, r0 in generate_matches(alt, nodes):
786                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
787                        r = {}
788                        r.update(r0)
789                        r.update(r1)
790                        yield c0 + c1, r
791
792
793class NegatedPattern(BasePattern):
794
795    def __init__(self, content=None):
796        """
797        Initializer.
798
799        The argument is either a pattern or None.  If it is None, this
800        only matches an empty sequence (effectively '$' in regex
801        lingo).  If it is not None, this matches whenever the argument
802        pattern doesn't have any matches.
803        """
804        if content is not None:
805            assert isinstance(content, BasePattern), repr(content)
806        self.content = content
807
808    def match(self, node):
809        # We never match a node in its entirety
810        return False
811
812    def match_seq(self, nodes):
813        # We only match an empty sequence of nodes in its entirety
814        return len(nodes) == 0
815
816    def generate_matches(self, nodes):
817        if self.content is None:
818            # Return a match if there is an empty sequence
819            if len(nodes) == 0:
820                yield 0, {}
821        else:
822            # Return a match if the argument pattern has no matches
823            for c, r in self.content.generate_matches(nodes):
824                return
825            yield 0, {}
826
827
828def generate_matches(patterns, nodes):
829    """
830    Generator yielding matches for a sequence of patterns and nodes.
831
832    Args:
833        patterns: a sequence of patterns
834        nodes: a sequence of nodes
835
836    Yields:
837        (count, results) tuples where:
838        count: the entire sequence of patterns matches nodes[:count];
839        results: dict containing named submatches.
840        """
841    if not patterns:
842        yield 0, {}
843    else:
844        p, rest = patterns[0], patterns[1:]
845        for c0, r0 in p.generate_matches(nodes):
846            if not rest:
847                yield c0, r0
848            else:
849                for c1, r1 in generate_matches(rest, nodes[c0:]):
850                    r = {}
851                    r.update(r0)
852                    r.update(r1)
853                    yield c0 + c1, r
854