1# Copyright 2006 Google, Inc. All Rights Reserved.
2# Licensed to PSF under a Contributor Agreement.
3
4"""
5Python parse tree definitions.
6
7This is a very concrete parse tree; we need to keep every token and
8even the comments and whitespace between tokens.
9
10There's also a pattern matching implementation here.
11"""
12
13__author__ = "Guido van Rossum <guido@python.org>"
14
15import sys
16import warnings
17from StringIO import StringIO
18
19HUGE = 0x7FFFFFFF  # maximum repeat count, default max
20
21_type_reprs = {}
22def type_repr(type_num):
23    global _type_reprs
24    if not _type_reprs:
25        from .pygram import python_symbols
26        # printing tokens is possible but not as useful
27        # from .pgen2 import token // token.__dict__.items():
28        for name, val in python_symbols.__dict__.items():
29            if type(val) == int: _type_reprs[val] = name
30    return _type_reprs.setdefault(type_num, type_num)
31
32class Base(object):
33
34    """
35    Abstract base class for Node and Leaf.
36
37    This provides some default functionality and boilerplate using the
38    template pattern.
39
40    A node may be a subnode of at most one parent.
41    """
42
43    # Default values for instance variables
44    type = None    # int: token number (< 256) or symbol number (>= 256)
45    parent = None  # Parent node pointer, or None
46    children = ()  # Tuple of subnodes
47    was_changed = False
48    was_checked = False
49
50    def __new__(cls, *args, **kwds):
51        """Constructor that prevents Base from being instantiated."""
52        assert cls is not Base, "Cannot instantiate Base"
53        return object.__new__(cls)
54
55    def __eq__(self, other):
56        """
57        Compare two nodes for equality.
58
59        This calls the method _eq().
60        """
61        if self.__class__ is not other.__class__:
62            return NotImplemented
63        return self._eq(other)
64
65    __hash__ = None # For Py3 compatibility.
66
67    def __ne__(self, other):
68        """
69        Compare two nodes for inequality.
70
71        This calls the method _eq().
72        """
73        if self.__class__ is not other.__class__:
74            return NotImplemented
75        return not self._eq(other)
76
77    def _eq(self, other):
78        """
79        Compare two nodes for equality.
80
81        This is called by __eq__ and __ne__.  It is only called if the two nodes
82        have the same type.  This must be implemented by the concrete subclass.
83        Nodes should be considered equal if they have the same structure,
84        ignoring the prefix string and other context information.
85        """
86        raise NotImplementedError
87
88    def clone(self):
89        """
90        Return a cloned (deep) copy of self.
91
92        This must be implemented by the concrete subclass.
93        """
94        raise NotImplementedError
95
96    def post_order(self):
97        """
98        Return a post-order iterator for the tree.
99
100        This must be implemented by the concrete subclass.
101        """
102        raise NotImplementedError
103
104    def pre_order(self):
105        """
106        Return a pre-order iterator for the tree.
107
108        This must be implemented by the concrete subclass.
109        """
110        raise NotImplementedError
111
112    def set_prefix(self, prefix):
113        """
114        Set the prefix for the node (see Leaf class).
115
116        DEPRECATED; use the prefix property directly.
117        """
118        warnings.warn("set_prefix() is deprecated; use the prefix property",
119                      DeprecationWarning, stacklevel=2)
120        self.prefix = prefix
121
122    def get_prefix(self):
123        """
124        Return the prefix for the node (see Leaf class).
125
126        DEPRECATED; use the prefix property directly.
127        """
128        warnings.warn("get_prefix() is deprecated; use the prefix property",
129                      DeprecationWarning, stacklevel=2)
130        return self.prefix
131
132    def replace(self, new):
133        """Replace this node with a new one in the parent."""
134        assert self.parent is not None, str(self)
135        assert new is not None
136        if not isinstance(new, list):
137            new = [new]
138        l_children = []
139        found = False
140        for ch in self.parent.children:
141            if ch is self:
142                assert not found, (self.parent.children, self, new)
143                if new is not None:
144                    l_children.extend(new)
145                found = True
146            else:
147                l_children.append(ch)
148        assert found, (self.children, self, new)
149        self.parent.changed()
150        self.parent.children = l_children
151        for x in new:
152            x.parent = self.parent
153        self.parent = None
154
155    def get_lineno(self):
156        """Return the line number which generated the invocant node."""
157        node = self
158        while not isinstance(node, Leaf):
159            if not node.children:
160                return
161            node = node.children[0]
162        return node.lineno
163
164    def changed(self):
165        if self.parent:
166            self.parent.changed()
167        self.was_changed = True
168
169    def remove(self):
170        """
171        Remove the node from the tree. Returns the position of the node in its
172        parent's children before it was removed.
173        """
174        if self.parent:
175            for i, node in enumerate(self.parent.children):
176                if node is self:
177                    self.parent.changed()
178                    del self.parent.children[i]
179                    self.parent = None
180                    return i
181
182    @property
183    def next_sibling(self):
184        """
185        The node immediately following the invocant in their parent's children
186        list. If the invocant does not have a next sibling, it is None
187        """
188        if self.parent is None:
189            return None
190
191        # Can't use index(); we need to test by identity
192        for i, child in enumerate(self.parent.children):
193            if child is self:
194                try:
195                    return self.parent.children[i+1]
196                except IndexError:
197                    return None
198
199    @property
200    def prev_sibling(self):
201        """
202        The node immediately preceding the invocant in their parent's children
203        list. If the invocant does not have a previous sibling, it is None.
204        """
205        if self.parent is None:
206            return None
207
208        # Can't use index(); we need to test by identity
209        for i, child in enumerate(self.parent.children):
210            if child is self:
211                if i == 0:
212                    return None
213                return self.parent.children[i-1]
214
215    def leaves(self):
216        for child in self.children:
217            for x in child.leaves():
218                yield x
219
220    def depth(self):
221        if self.parent is None:
222            return 0
223        return 1 + self.parent.depth()
224
225    def get_suffix(self):
226        """
227        Return the string immediately following the invocant node. This is
228        effectively equivalent to node.next_sibling.prefix
229        """
230        next_sib = self.next_sibling
231        if next_sib is None:
232            return u""
233        return next_sib.prefix
234
235    if sys.version_info < (3, 0):
236        def __str__(self):
237            return unicode(self).encode("ascii")
238
239class Node(Base):
240
241    """Concrete implementation for interior nodes."""
242
243    def __init__(self,type, children,
244                 context=None,
245                 prefix=None,
246                 fixers_applied=None):
247        """
248        Initializer.
249
250        Takes a type constant (a symbol number >= 256), a sequence of
251        child nodes, and an optional context keyword argument.
252
253        As a side effect, the parent pointers of the children are updated.
254        """
255        assert type >= 256, type
256        self.type = type
257        self.children = list(children)
258        for ch in self.children:
259            assert ch.parent is None, repr(ch)
260            ch.parent = self
261        if prefix is not None:
262            self.prefix = prefix
263        if fixers_applied:
264            self.fixers_applied = fixers_applied[:]
265        else:
266            self.fixers_applied = None
267
268    def __repr__(self):
269        """Return a canonical string representation."""
270        return "%s(%s, %r)" % (self.__class__.__name__,
271                               type_repr(self.type),
272                               self.children)
273
274    def __unicode__(self):
275        """
276        Return a pretty string representation.
277
278        This reproduces the input source exactly.
279        """
280        return u"".join(map(unicode, self.children))
281
282    if sys.version_info > (3, 0):
283        __str__ = __unicode__
284
285    def _eq(self, other):
286        """Compare two nodes for equality."""
287        return (self.type, self.children) == (other.type, other.children)
288
289    def clone(self):
290        """Return a cloned (deep) copy of self."""
291        return Node(self.type, [ch.clone() for ch in self.children],
292                    fixers_applied=self.fixers_applied)
293
294    def post_order(self):
295        """Return a post-order iterator for the tree."""
296        for child in self.children:
297            for node in child.post_order():
298                yield node
299        yield self
300
301    def pre_order(self):
302        """Return a pre-order iterator for the tree."""
303        yield self
304        for child in self.children:
305            for node in child.pre_order():
306                yield node
307
308    def _prefix_getter(self):
309        """
310        The whitespace and comments preceding this node in the input.
311        """
312        if not self.children:
313            return ""
314        return self.children[0].prefix
315
316    def _prefix_setter(self, prefix):
317        if self.children:
318            self.children[0].prefix = prefix
319
320    prefix = property(_prefix_getter, _prefix_setter)
321
322    def set_child(self, i, child):
323        """
324        Equivalent to 'node.children[i] = child'. This method also sets the
325        child's parent attribute appropriately.
326        """
327        child.parent = self
328        self.children[i].parent = None
329        self.children[i] = child
330        self.changed()
331
332    def insert_child(self, i, child):
333        """
334        Equivalent to 'node.children.insert(i, child)'. This method also sets
335        the child's parent attribute appropriately.
336        """
337        child.parent = self
338        self.children.insert(i, child)
339        self.changed()
340
341    def append_child(self, child):
342        """
343        Equivalent to 'node.children.append(child)'. This method also sets the
344        child's parent attribute appropriately.
345        """
346        child.parent = self
347        self.children.append(child)
348        self.changed()
349
350
351class Leaf(Base):
352
353    """Concrete implementation for leaf nodes."""
354
355    # Default values for instance variables
356    _prefix = ""  # Whitespace and comments preceding this token in the input
357    lineno = 0    # Line where this token starts in the input
358    column = 0    # Column where this token tarts in the input
359
360    def __init__(self, type, value,
361                 context=None,
362                 prefix=None,
363                 fixers_applied=[]):
364        """
365        Initializer.
366
367        Takes a type constant (a token number < 256), a string value, and an
368        optional context keyword argument.
369        """
370        assert 0 <= type < 256, type
371        if context is not None:
372            self._prefix, (self.lineno, self.column) = context
373        self.type = type
374        self.value = value
375        if prefix is not None:
376            self._prefix = prefix
377        self.fixers_applied = fixers_applied[:]
378
379    def __repr__(self):
380        """Return a canonical string representation."""
381        return "%s(%r, %r)" % (self.__class__.__name__,
382                               self.type,
383                               self.value)
384
385    def __unicode__(self):
386        """
387        Return a pretty string representation.
388
389        This reproduces the input source exactly.
390        """
391        return self.prefix + unicode(self.value)
392
393    if sys.version_info > (3, 0):
394        __str__ = __unicode__
395
396    def _eq(self, other):
397        """Compare two nodes for equality."""
398        return (self.type, self.value) == (other.type, other.value)
399
400    def clone(self):
401        """Return a cloned (deep) copy of self."""
402        return Leaf(self.type, self.value,
403                    (self.prefix, (self.lineno, self.column)),
404                    fixers_applied=self.fixers_applied)
405
406    def leaves(self):
407        yield self
408
409    def post_order(self):
410        """Return a post-order iterator for the tree."""
411        yield self
412
413    def pre_order(self):
414        """Return a pre-order iterator for the tree."""
415        yield self
416
417    def _prefix_getter(self):
418        """
419        The whitespace and comments preceding this token in the input.
420        """
421        return self._prefix
422
423    def _prefix_setter(self, prefix):
424        self.changed()
425        self._prefix = prefix
426
427    prefix = property(_prefix_getter, _prefix_setter)
428
429def convert(gr, raw_node):
430    """
431    Convert raw node information to a Node or Leaf instance.
432
433    This is passed to the parser driver which calls it whenever a reduction of a
434    grammar rule produces a new complete node, so that the tree is build
435    strictly bottom-up.
436    """
437    type, value, context, children = raw_node
438    if children or type in gr.number2symbol:
439        # If there's exactly one child, return that child instead of
440        # creating a new node.
441        if len(children) == 1:
442            return children[0]
443        return Node(type, children, context=context)
444    else:
445        return Leaf(type, value, context=context)
446
447
448class BasePattern(object):
449
450    """
451    A pattern is a tree matching pattern.
452
453    It looks for a specific node type (token or symbol), and
454    optionally for a specific content.
455
456    This is an abstract base class.  There are three concrete
457    subclasses:
458
459    - LeafPattern matches a single leaf node;
460    - NodePattern matches a single node (usually non-leaf);
461    - WildcardPattern matches a sequence of nodes of variable length.
462    """
463
464    # Defaults for instance variables
465    type = None     # Node type (token if < 256, symbol if >= 256)
466    content = None  # Optional content matching pattern
467    name = None     # Optional name used to store match in results dict
468
469    def __new__(cls, *args, **kwds):
470        """Constructor that prevents BasePattern from being instantiated."""
471        assert cls is not BasePattern, "Cannot instantiate BasePattern"
472        return object.__new__(cls)
473
474    def __repr__(self):
475        args = [type_repr(self.type), self.content, self.name]
476        while args and args[-1] is None:
477            del args[-1]
478        return "%s(%s)" % (self.__class__.__name__, ", ".join(map(repr, args)))
479
480    def optimize(self):
481        """
482        A subclass can define this as a hook for optimizations.
483
484        Returns either self or another node with the same effect.
485        """
486        return self
487
488    def match(self, node, results=None):
489        """
490        Does this pattern exactly match a node?
491
492        Returns True if it matches, False if not.
493
494        If results is not None, it must be a dict which will be
495        updated with the nodes matching named subpatterns.
496
497        Default implementation for non-wildcard patterns.
498        """
499        if self.type is not None and node.type != self.type:
500            return False
501        if self.content is not None:
502            r = None
503            if results is not None:
504                r = {}
505            if not self._submatch(node, r):
506                return False
507            if r:
508                results.update(r)
509        if results is not None and self.name:
510            results[self.name] = node
511        return True
512
513    def match_seq(self, nodes, results=None):
514        """
515        Does this pattern exactly match a sequence of nodes?
516
517        Default implementation for non-wildcard patterns.
518        """
519        if len(nodes) != 1:
520            return False
521        return self.match(nodes[0], results)
522
523    def generate_matches(self, nodes):
524        """
525        Generator yielding all matches for this pattern.
526
527        Default implementation for non-wildcard patterns.
528        """
529        r = {}
530        if nodes and self.match(nodes[0], r):
531            yield 1, r
532
533
534class LeafPattern(BasePattern):
535
536    def __init__(self, type=None, content=None, name=None):
537        """
538        Initializer.  Takes optional type, content, and name.
539
540        The type, if given must be a token type (< 256).  If not given,
541        this matches any *leaf* node; the content may still be required.
542
543        The content, if given, must be a string.
544
545        If a name is given, the matching node is stored in the results
546        dict under that key.
547        """
548        if type is not None:
549            assert 0 <= type < 256, type
550        if content is not None:
551            assert isinstance(content, basestring), repr(content)
552        self.type = type
553        self.content = content
554        self.name = name
555
556    def match(self, node, results=None):
557        """Override match() to insist on a leaf node."""
558        if not isinstance(node, Leaf):
559            return False
560        return BasePattern.match(self, node, results)
561
562    def _submatch(self, node, results=None):
563        """
564        Match the pattern's content to the node's children.
565
566        This assumes the node type matches and self.content is not None.
567
568        Returns True if it matches, False if not.
569
570        If results is not None, it must be a dict which will be
571        updated with the nodes matching named subpatterns.
572
573        When returning False, the results dict may still be updated.
574        """
575        return self.content == node.value
576
577
578class NodePattern(BasePattern):
579
580    wildcards = False
581
582    def __init__(self, type=None, content=None, name=None):
583        """
584        Initializer.  Takes optional type, content, and name.
585
586        The type, if given, must be a symbol type (>= 256).  If the
587        type is None this matches *any* single node (leaf or not),
588        except if content is not None, in which it only matches
589        non-leaf nodes that also match the content pattern.
590
591        The content, if not None, must be a sequence of Patterns that
592        must match the node's children exactly.  If the content is
593        given, the type must not be None.
594
595        If a name is given, the matching node is stored in the results
596        dict under that key.
597        """
598        if type is not None:
599            assert type >= 256, type
600        if content is not None:
601            assert not isinstance(content, basestring), repr(content)
602            content = list(content)
603            for i, item in enumerate(content):
604                assert isinstance(item, BasePattern), (i, item)
605                if isinstance(item, WildcardPattern):
606                    self.wildcards = True
607        self.type = type
608        self.content = content
609        self.name = name
610
611    def _submatch(self, node, results=None):
612        """
613        Match the pattern's content to the node's children.
614
615        This assumes the node type matches and self.content is not None.
616
617        Returns True if it matches, False if not.
618
619        If results is not None, it must be a dict which will be
620        updated with the nodes matching named subpatterns.
621
622        When returning False, the results dict may still be updated.
623        """
624        if self.wildcards:
625            for c, r in generate_matches(self.content, node.children):
626                if c == len(node.children):
627                    if results is not None:
628                        results.update(r)
629                    return True
630            return False
631        if len(self.content) != len(node.children):
632            return False
633        for subpattern, child in zip(self.content, node.children):
634            if not subpattern.match(child, results):
635                return False
636        return True
637
638
639class WildcardPattern(BasePattern):
640
641    """
642    A wildcard pattern can match zero or more nodes.
643
644    This has all the flexibility needed to implement patterns like:
645
646    .*      .+      .?      .{m,n}
647    (a b c | d e | f)
648    (...)*  (...)+  (...)?  (...){m,n}
649
650    except it always uses non-greedy matching.
651    """
652
653    def __init__(self, content=None, min=0, max=HUGE, name=None):
654        """
655        Initializer.
656
657        Args:
658            content: optional sequence of subsequences of patterns;
659                     if absent, matches one node;
660                     if present, each subsequence is an alternative [*]
661            min: optional minimum number of times to match, default 0
662            max: optional maximum number of times to match, default HUGE
663            name: optional name assigned to this match
664
665        [*] Thus, if content is [[a, b, c], [d, e], [f, g, h]] this is
666            equivalent to (a b c | d e | f g h); if content is None,
667            this is equivalent to '.' in regular expression terms.
668            The min and max parameters work as follows:
669                min=0, max=maxint: .*
670                min=1, max=maxint: .+
671                min=0, max=1: .?
672                min=1, max=1: .
673            If content is not None, replace the dot with the parenthesized
674            list of alternatives, e.g. (a b c | d e | f g h)*
675        """
676        assert 0 <= min <= max <= HUGE, (min, max)
677        if content is not None:
678            content = tuple(map(tuple, content))  # Protect against alterations
679            # Check sanity of alternatives
680            assert len(content), repr(content)  # Can't have zero alternatives
681            for alt in content:
682                assert len(alt), repr(alt) # Can have empty alternatives
683        self.content = content
684        self.min = min
685        self.max = max
686        self.name = name
687
688    def optimize(self):
689        """Optimize certain stacked wildcard patterns."""
690        subpattern = None
691        if (self.content is not None and
692            len(self.content) == 1 and len(self.content[0]) == 1):
693            subpattern = self.content[0][0]
694        if self.min == 1 and self.max == 1:
695            if self.content is None:
696                return NodePattern(name=self.name)
697            if subpattern is not None and  self.name == subpattern.name:
698                return subpattern.optimize()
699        if (self.min <= 1 and isinstance(subpattern, WildcardPattern) and
700            subpattern.min <= 1 and self.name == subpattern.name):
701            return WildcardPattern(subpattern.content,
702                                   self.min*subpattern.min,
703                                   self.max*subpattern.max,
704                                   subpattern.name)
705        return self
706
707    def match(self, node, results=None):
708        """Does this pattern exactly match a node?"""
709        return self.match_seq([node], results)
710
711    def match_seq(self, nodes, results=None):
712        """Does this pattern exactly match a sequence of nodes?"""
713        for c, r in self.generate_matches(nodes):
714            if c == len(nodes):
715                if results is not None:
716                    results.update(r)
717                    if self.name:
718                        results[self.name] = list(nodes)
719                return True
720        return False
721
722    def generate_matches(self, nodes):
723        """
724        Generator yielding matches for a sequence of nodes.
725
726        Args:
727            nodes: sequence of nodes
728
729        Yields:
730            (count, results) tuples where:
731            count: the match comprises nodes[:count];
732            results: dict containing named submatches.
733        """
734        if self.content is None:
735            # Shortcut for special case (see __init__.__doc__)
736            for count in xrange(self.min, 1 + min(len(nodes), self.max)):
737                r = {}
738                if self.name:
739                    r[self.name] = nodes[:count]
740                yield count, r
741        elif self.name == "bare_name":
742            yield self._bare_name_matches(nodes)
743        else:
744            # The reason for this is that hitting the recursion limit usually
745            # results in some ugly messages about how RuntimeErrors are being
746            # ignored. We don't do this on non-CPython implementation because
747            # they don't have this problem.
748            if hasattr(sys, "getrefcount"):
749                save_stderr = sys.stderr
750                sys.stderr = StringIO()
751            try:
752                for count, r in self._recursive_matches(nodes, 0):
753                    if self.name:
754                        r[self.name] = nodes[:count]
755                    yield count, r
756            except RuntimeError:
757                # We fall back to the iterative pattern matching scheme if the recursive
758                # scheme hits the recursion limit.
759                for count, r in self._iterative_matches(nodes):
760                    if self.name:
761                        r[self.name] = nodes[:count]
762                    yield count, r
763            finally:
764                if hasattr(sys, "getrefcount"):
765                    sys.stderr = save_stderr
766
767    def _iterative_matches(self, nodes):
768        """Helper to iteratively yield the matches."""
769        nodelen = len(nodes)
770        if 0 >= self.min:
771            yield 0, {}
772
773        results = []
774        # generate matches that use just one alt from self.content
775        for alt in self.content:
776            for c, r in generate_matches(alt, nodes):
777                yield c, r
778                results.append((c, r))
779
780        # for each match, iterate down the nodes
781        while results:
782            new_results = []
783            for c0, r0 in results:
784                # stop if the entire set of nodes has been matched
785                if c0 < nodelen and c0 <= self.max:
786                    for alt in self.content:
787                        for c1, r1 in generate_matches(alt, nodes[c0:]):
788                            if c1 > 0:
789                                r = {}
790                                r.update(r0)
791                                r.update(r1)
792                                yield c0 + c1, r
793                                new_results.append((c0 + c1, r))
794            results = new_results
795
796    def _bare_name_matches(self, nodes):
797        """Special optimized matcher for bare_name."""
798        count = 0
799        r = {}
800        done = False
801        max = len(nodes)
802        while not done and count < max:
803            done = True
804            for leaf in self.content:
805                if leaf[0].match(nodes[count], r):
806                    count += 1
807                    done = False
808                    break
809        r[self.name] = nodes[:count]
810        return count, r
811
812    def _recursive_matches(self, nodes, count):
813        """Helper to recursively yield the matches."""
814        assert self.content is not None
815        if count >= self.min:
816            yield 0, {}
817        if count < self.max:
818            for alt in self.content:
819                for c0, r0 in generate_matches(alt, nodes):
820                    for c1, r1 in self._recursive_matches(nodes[c0:], count+1):
821                        r = {}
822                        r.update(r0)
823                        r.update(r1)
824                        yield c0 + c1, r
825
826
827class NegatedPattern(BasePattern):
828
829    def __init__(self, content=None):
830        """
831        Initializer.
832
833        The argument is either a pattern or None.  If it is None, this
834        only matches an empty sequence (effectively '$' in regex
835        lingo).  If it is not None, this matches whenever the argument
836        pattern doesn't have any matches.
837        """
838        if content is not None:
839            assert isinstance(content, BasePattern), repr(content)
840        self.content = content
841
842    def match(self, node):
843        # We never match a node in its entirety
844        return False
845
846    def match_seq(self, nodes):
847        # We only match an empty sequence of nodes in its entirety
848        return len(nodes) == 0
849
850    def generate_matches(self, nodes):
851        if self.content is None:
852            # Return a match if there is an empty sequence
853            if len(nodes) == 0:
854                yield 0, {}
855        else:
856            # Return a match if the argument pattern has no matches
857            for c, r in self.content.generate_matches(nodes):
858                return
859            yield 0, {}
860
861
862def generate_matches(patterns, nodes):
863    """
864    Generator yielding matches for a sequence of patterns and nodes.
865
866    Args:
867        patterns: a sequence of patterns
868        nodes: a sequence of nodes
869
870    Yields:
871        (count, results) tuples where:
872        count: the entire sequence of patterns matches nodes[:count];
873        results: dict containing named submatches.
874        """
875    if not patterns:
876        yield 0, {}
877    else:
878        p, rest = patterns[0], patterns[1:]
879        for c0, r0 in p.generate_matches(nodes):
880            if not rest:
881                yield c0, r0
882            else:
883                for c1, r1 in generate_matches(rest, nodes[c0:]):
884                    r = {}
885                    r.update(r0)
886                    r.update(r1)
887                    yield c0 + c1, r
888