1"Utility functions used by the btm_matcher module"
2
3from . import pytree
4from .pgen2 import grammar, token
5from .pygram import pattern_symbols, python_symbols
6
7syms = pattern_symbols
8pysyms = python_symbols
9tokens = grammar.opmap
10token_labels = token
11
12TYPE_ANY = -1
13TYPE_ALTERNATIVES = -2
14TYPE_GROUP = -3
15
16class MinNode(object):
17    """This class serves as an intermediate representation of the
18    pattern tree during the conversion to sets of leaf-to-root
19    subpatterns"""
20
21    def __init__(self, type=None, name=None):
22        self.type = type
23        self.name = name
24        self.children = []
25        self.leaf = False
26        self.parent = None
27        self.alternatives = []
28        self.group = []
29
30    def __repr__(self):
31        return str(self.type) + ' ' + str(self.name)
32
33    def leaf_to_root(self):
34        """Internal method. Returns a characteristic path of the
35        pattern tree. This method must be run for all leaves until the
36        linear subpatterns are merged into a single"""
37        node = self
38        subp = []
39        while node:
40            if node.type == TYPE_ALTERNATIVES:
41                node.alternatives.append(subp)
42                if len(node.alternatives) == len(node.children):
43                    #last alternative
44                    subp = [tuple(node.alternatives)]
45                    node.alternatives = []
46                    node = node.parent
47                    continue
48                else:
49                    node = node.parent
50                    subp = None
51                    break
52
53            if node.type == TYPE_GROUP:
54                node.group.append(subp)
55                #probably should check the number of leaves
56                if len(node.group) == len(node.children):
57                    subp = get_characteristic_subpattern(node.group)
58                    node.group = []
59                    node = node.parent
60                    continue
61                else:
62                    node = node.parent
63                    subp = None
64                    break
65
66            if node.type == token_labels.NAME and node.name:
67                #in case of type=name, use the name instead
68                subp.append(node.name)
69            else:
70                subp.append(node.type)
71
72            node = node.parent
73        return subp
74
75    def get_linear_subpattern(self):
76        """Drives the leaf_to_root method. The reason that
77        leaf_to_root must be run multiple times is because we need to
78        reject 'group' matches; for example the alternative form
79        (a | b c) creates a group [b c] that needs to be matched. Since
80        matching multiple linear patterns overcomes the automaton's
81        capabilities, leaf_to_root merges each group into a single
82        choice based on 'characteristic'ity,
83
84        i.e. (a|b c) -> (a|b) if b more characteristic than c
85
86        Returns: The most 'characteristic'(as defined by
87          get_characteristic_subpattern) path for the compiled pattern
88          tree.
89        """
90
91        for l in self.leaves():
92            subp = l.leaf_to_root()
93            if subp:
94                return subp
95
96    def leaves(self):
97        "Generator that returns the leaves of the tree"
98        for child in self.children:
99            yield from child.leaves()
100        if not self.children:
101            yield self
102
103def reduce_tree(node, parent=None):
104    """
105    Internal function. Reduces a compiled pattern tree to an
106    intermediate representation suitable for feeding the
107    automaton. This also trims off any optional pattern elements(like
108    [a], a*).
109    """
110
111    new_node = None
112    #switch on the node type
113    if node.type == syms.Matcher:
114        #skip
115        node = node.children[0]
116
117    if node.type == syms.Alternatives  :
118        #2 cases
119        if len(node.children) <= 2:
120            #just a single 'Alternative', skip this node
121            new_node = reduce_tree(node.children[0], parent)
122        else:
123            #real alternatives
124            new_node = MinNode(type=TYPE_ALTERNATIVES)
125            #skip odd children('|' tokens)
126            for child in node.children:
127                if node.children.index(child)%2:
128                    continue
129                reduced = reduce_tree(child, new_node)
130                if reduced is not None:
131                    new_node.children.append(reduced)
132    elif node.type == syms.Alternative:
133        if len(node.children) > 1:
134
135            new_node = MinNode(type=TYPE_GROUP)
136            for child in node.children:
137                reduced = reduce_tree(child, new_node)
138                if reduced:
139                    new_node.children.append(reduced)
140            if not new_node.children:
141                # delete the group if all of the children were reduced to None
142                new_node = None
143
144        else:
145            new_node = reduce_tree(node.children[0], parent)
146
147    elif node.type == syms.Unit:
148        if (isinstance(node.children[0], pytree.Leaf) and
149            node.children[0].value == '('):
150            #skip parentheses
151            return reduce_tree(node.children[1], parent)
152        if ((isinstance(node.children[0], pytree.Leaf) and
153               node.children[0].value == '[')
154               or
155               (len(node.children)>1 and
156               hasattr(node.children[1], "value") and
157               node.children[1].value == '[')):
158            #skip whole unit if its optional
159            return None
160
161        leaf = True
162        details_node = None
163        alternatives_node = None
164        has_repeater = False
165        repeater_node = None
166        has_variable_name = False
167
168        for child in node.children:
169            if child.type == syms.Details:
170                leaf = False
171                details_node = child
172            elif child.type == syms.Repeater:
173                has_repeater = True
174                repeater_node = child
175            elif child.type == syms.Alternatives:
176                alternatives_node = child
177            if hasattr(child, 'value') and child.value == '=': # variable name
178                has_variable_name = True
179
180        #skip variable name
181        if has_variable_name:
182            #skip variable name, '='
183            name_leaf = node.children[2]
184            if hasattr(name_leaf, 'value') and name_leaf.value == '(':
185                # skip parenthesis
186                name_leaf = node.children[3]
187        else:
188            name_leaf = node.children[0]
189
190        #set node type
191        if name_leaf.type == token_labels.NAME:
192            #(python) non-name or wildcard
193            if name_leaf.value == 'any':
194                new_node = MinNode(type=TYPE_ANY)
195            else:
196                if hasattr(token_labels, name_leaf.value):
197                    new_node = MinNode(type=getattr(token_labels, name_leaf.value))
198                else:
199                    new_node = MinNode(type=getattr(pysyms, name_leaf.value))
200
201        elif name_leaf.type == token_labels.STRING:
202            #(python) name or character; remove the apostrophes from
203            #the string value
204            name = name_leaf.value.strip("'")
205            if name in tokens:
206                new_node = MinNode(type=tokens[name])
207            else:
208                new_node = MinNode(type=token_labels.NAME, name=name)
209        elif name_leaf.type == syms.Alternatives:
210            new_node = reduce_tree(alternatives_node, parent)
211
212        #handle repeaters
213        if has_repeater:
214            if repeater_node.children[0].value == '*':
215                #reduce to None
216                new_node = None
217            elif repeater_node.children[0].value == '+':
218                #reduce to a single occurrence i.e. do nothing
219                pass
220            else:
221                #TODO: handle {min, max} repeaters
222                raise NotImplementedError
223                pass
224
225        #add children
226        if details_node and new_node is not None:
227            for child in details_node.children[1:-1]:
228                #skip '<', '>' markers
229                reduced = reduce_tree(child, new_node)
230                if reduced is not None:
231                    new_node.children.append(reduced)
232    if new_node:
233        new_node.parent = parent
234    return new_node
235
236
237def get_characteristic_subpattern(subpatterns):
238    """Picks the most characteristic from a list of linear patterns
239    Current order used is:
240    names > common_names > common_chars
241    """
242    if not isinstance(subpatterns, list):
243        return subpatterns
244    if len(subpatterns)==1:
245        return subpatterns[0]
246
247    # first pick out the ones containing variable names
248    subpatterns_with_names = []
249    subpatterns_with_common_names = []
250    common_names = ['in', 'for', 'if' , 'not', 'None']
251    subpatterns_with_common_chars = []
252    common_chars = "[]().,:"
253    for subpattern in subpatterns:
254        if any(rec_test(subpattern, lambda x: type(x) is str)):
255            if any(rec_test(subpattern,
256                            lambda x: isinstance(x, str) and x in common_chars)):
257                subpatterns_with_common_chars.append(subpattern)
258            elif any(rec_test(subpattern,
259                              lambda x: isinstance(x, str) and x in common_names)):
260                subpatterns_with_common_names.append(subpattern)
261
262            else:
263                subpatterns_with_names.append(subpattern)
264
265    if subpatterns_with_names:
266        subpatterns = subpatterns_with_names
267    elif subpatterns_with_common_names:
268        subpatterns = subpatterns_with_common_names
269    elif subpatterns_with_common_chars:
270        subpatterns = subpatterns_with_common_chars
271    # of the remaining subpatterns pick out the longest one
272    return max(subpatterns, key=len)
273
274def rec_test(sequence, test_func):
275    """Tests test_func on all items of sequence and items of included
276    sub-iterables"""
277    for x in sequence:
278        if isinstance(x, (list, tuple)):
279            yield from rec_test(x, test_func)
280        else:
281            yield test_func(x)
282