1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""pytree-related utilities.
15
16This module collects various utilities related to the parse trees produced by
17the lib2to3 library.
18
19  NodeName(): produces a string name for pytree nodes.
20  ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse
21                     a given string with code to a pytree.
22  InsertNodeBefore(): insert a node before another in a pytree.
23  InsertNodeAfter(): insert a node after another in a pytree.
24  {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes.
25"""
26
27import ast
28
29from lib2to3 import pygram
30from lib2to3 import pytree
31from lib2to3.pgen2 import driver
32from lib2to3.pgen2 import parse
33from lib2to3.pgen2 import token
34
35# TODO(eliben): We may want to get rid of this filtering at some point once we
36# have a better understanding of what information we need from the tree. Then,
37# these tokens may be filtered out from the tree before the tree gets to the
38# unwrapper.
39NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER'])
40
41OPENING_BRACKETS = frozenset({'(', '[', '{'})
42CLOSING_BRACKETS = frozenset({')', ']', '}'})
43
44
45class Annotation(object):
46  """Annotation names associated with pytrees."""
47  CHILD_INDENT = 'child_indent'
48  NEWLINES = 'newlines'
49  MUST_SPLIT = 'must_split'
50  SPLIT_PENALTY = 'split_penalty'
51  SUBTYPE = 'subtype'
52
53
54def NodeName(node):
55  """Produce a string name for a given node.
56
57  For a Leaf this is the token name, and for a Node this is the type.
58
59  Arguments:
60    node: a tree node
61
62  Returns:
63    Name as a string.
64  """
65  # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols.
66  if node.type < 256:
67    return token.tok_name[node.type]
68  else:
69    return pygram.python_grammar.number2symbol[node.type]
70
71
72def FirstLeafNode(node):
73  if isinstance(node, pytree.Leaf):
74    return node
75  return FirstLeafNode(node.children[0])
76
77
78def LastLeafNode(node):
79  if isinstance(node, pytree.Leaf):
80    return node
81  return LastLeafNode(node.children[-1])
82
83
84# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for
85# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a
86# context where a keyword is disallowed).
87# It forgets to do the same for 'exec' though. Luckily, Python is amenable to
88# monkey-patching.
89_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy()
90del _GRAMMAR_FOR_PY3.keywords['exec']
91
92_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy()
93del _GRAMMAR_FOR_PY2.keywords['nonlocal']
94
95
96def ParseCodeToTree(code):
97  """Parse the given code to a lib2to3 pytree.
98
99  Arguments:
100    code: a string with the code to parse.
101
102  Raises:
103    SyntaxError if the code is invalid syntax.
104    parse.ParseError if some other parsing failure.
105
106  Returns:
107    The root node of the parsed tree.
108  """
109  # This function is tiny, but the incantation for invoking the parser correctly
110  # is sufficiently magical to be worth abstracting away.
111  try:
112    # Try to parse using a Python 3 grammar, which is more permissive (print and
113    # exec are not keywords).
114    parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert)
115    tree = parser_driver.parse_string(code, debug=False)
116  except parse.ParseError:
117    # Now try to parse using a Python 2 grammar; If this fails, then
118    # there's something else wrong with the code.
119    try:
120      parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert)
121      tree = parser_driver.parse_string(code, debug=False)
122    except parse.ParseError:
123      # Raise a syntax error if the code is invalid python syntax.
124      try:
125        ast.parse(code)
126      except SyntaxError as e:
127        raise e
128      else:
129        raise
130  return _WrapEndMarker(tree)
131
132
133def _WrapEndMarker(tree):
134  """Wrap a single ENDMARKER token in a "file_input" node.
135
136  Arguments:
137    tree: (pytree.Node) The root node of the parsed tree.
138
139  Returns:
140    The root node of the parsed tree. If the tree is a single ENDMARKER node,
141    then that node is wrapped in a "file_input" node. That will ensure we don't
142    skip comments attached to that node.
143  """
144  if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER:
145    return pytree.Node(pygram.python_symbols.file_input, [tree])
146  return tree
147
148
149def InsertNodesBefore(new_nodes, target):
150  """Insert new_nodes before the given target location in the tree.
151
152  Arguments:
153    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
154      tree).
155    target: the target node before which the new node node will be inserted.
156
157  Raises:
158    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
159  """
160  for node in new_nodes:
161    _InsertNodeAt(node, target, after=False)
162
163
164def InsertNodesAfter(new_nodes, target):
165  """Insert new_nodes after the given target location in the tree.
166
167  Arguments:
168    new_nodes: a sequence of new nodes to insert (the nodes should not be in the
169      tree).
170    target: the target node after which the new node node will be inserted.
171
172  Raises:
173    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
174  """
175  for node in reversed(new_nodes):
176    _InsertNodeAt(node, target, after=True)
177
178
179def _InsertNodeAt(new_node, target, after=False):
180  """Underlying implementation for node insertion.
181
182  Arguments:
183    new_node: a new node to insert (this node should not be in the tree).
184    target: the target node.
185    after: if True, new_node is inserted after target. Otherwise, it's inserted
186      before target.
187
188  Returns:
189    nothing
190
191  Raises:
192    RuntimeError: if the tree is corrupted, or the insertion would corrupt it.
193  """
194
195  # Protect against attempts to insert nodes which already belong to some tree.
196  if new_node.parent is not None:
197    raise RuntimeError('inserting node which already has a parent',
198                       (new_node, new_node.parent))
199
200  # The code here is based on pytree.Base.next_sibling
201  parent_of_target = target.parent
202  if parent_of_target is None:
203    raise RuntimeError('expected target node to have a parent', (target,))
204
205  for i, child in enumerate(parent_of_target.children):
206    if child is target:
207      insertion_index = i + 1 if after else i
208      parent_of_target.insert_child(insertion_index, new_node)
209      return
210
211  raise RuntimeError('unable to find insertion point for target node',
212                     (target,))
213
214
215# The following constant and functions implement a simple custom annotation
216# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute
217# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be
218# managed through GetNodeAnnotation and SetNodeAnnotation.
219_NODE_ANNOTATION_PREFIX = '_yapf_annotation_'
220
221
222def CopyYapfAnnotations(src, dst):
223  """Copy all YAPF annotations from the source node to the destination node.
224
225  Arguments:
226    src: the source node.
227    dst: the destination node.
228  """
229  for annotation in dir(src):
230    if annotation.startswith(_NODE_ANNOTATION_PREFIX):
231      setattr(dst, annotation, getattr(src, annotation, None))
232
233
234def GetNodeAnnotation(node, annotation, default=None):
235  """Get annotation value from a node.
236
237  Arguments:
238    node: the node.
239    annotation: annotation name - a string.
240    default: the default value to return if there's no annotation.
241
242  Returns:
243    Value of the annotation in the given node. If the node doesn't have this
244    particular annotation name yet, returns default.
245  """
246  return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default)
247
248
249def SetNodeAnnotation(node, annotation, value):
250  """Set annotation value on a node.
251
252  Arguments:
253    node: the node.
254    annotation: annotation name - a string.
255    value: annotation value to set.
256  """
257  setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value)
258
259
260def AppendNodeAnnotation(node, annotation, value):
261  """Appends an annotation value to a list of annotations on the node.
262
263  Arguments:
264    node: the node.
265    annotation: annotation name - a string.
266    value: annotation value to set.
267  """
268  attr = GetNodeAnnotation(node, annotation, set())
269  attr.add(value)
270  SetNodeAnnotation(node, annotation, attr)
271
272
273def RemoveSubtypeAnnotation(node, value):
274  """Removes an annotation value from the subtype annotations on the node.
275
276  Arguments:
277    node: the node.
278    value: annotation value to remove.
279  """
280  attr = GetNodeAnnotation(node, Annotation.SUBTYPE)
281  if attr and value in attr:
282    attr.remove(value)
283    SetNodeAnnotation(node, Annotation.SUBTYPE, attr)
284
285
286def GetOpeningBracket(node):
287  """Get opening bracket value from a node.
288
289  Arguments:
290    node: the node.
291
292  Returns:
293    The opening bracket node or None if it couldn't find one.
294  """
295  return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None)
296
297
298def SetOpeningBracket(node, bracket):
299  """Set opening bracket value for a node.
300
301  Arguments:
302    node: the node.
303    bracket: opening bracket to set.
304  """
305  setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket)
306
307
308def DumpNodeToString(node):
309  """Dump a string representation of the given node. For debugging.
310
311  Arguments:
312    node: the node.
313
314  Returns:
315    The string representation.
316  """
317  if isinstance(node, pytree.Leaf):
318    fmt = ('{name}({value}) [lineno={lineno}, column={column}, '
319           'prefix={prefix}, penalty={penalty}]')
320    return fmt.format(
321        name=NodeName(node),
322        value=_PytreeNodeRepr(node),
323        lineno=node.lineno,
324        column=node.column,
325        prefix=repr(node.prefix),
326        penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None))
327  else:
328    fmt = '{node} [{len} children] [child_indent="{indent}"]'
329    return fmt.format(
330        node=NodeName(node),
331        len=len(node.children),
332        indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT))
333
334
335def _PytreeNodeRepr(node):
336  """Like pytree.Node.__repr__, but names instead of numbers for tokens."""
337  if isinstance(node, pytree.Node):
338    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node),
339                           [_PytreeNodeRepr(c) for c in node.children])
340  if isinstance(node, pytree.Leaf):
341    return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value)
342
343
344def IsCommentStatement(node):
345  return (NodeName(node) == 'simple_stmt' and
346          node.children[0].type == token.COMMENT)
347