1# Copyright 2015 Google Inc. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree.
15
16[for a description of what an unwrapped line is, see unwrapped_line.py]
17
18This is a pytree visitor that goes over a parse tree and produces a list of
19UnwrappedLine containers from it, each with its own depth and containing all
20the tokens that could fit on the line if there were no maximal line-length
21limitations.
22
23Note: a precondition to running this visitor and obtaining correct results is
24for the tree to have its comments spliced in as nodes. Prefixes are ignored.
25
26For most uses, the convenience function UnwrapPyTree should be sufficient.
27"""
28
29# The word "token" is overloaded within this module, so for clarity rename
30# the imported pgen2.token module.
31from lib2to3 import pytree
32from lib2to3.pgen2 import token as grammar_token
33
34from yapf.yapflib import format_token
35from yapf.yapflib import object_state
36from yapf.yapflib import pytree_utils
37from yapf.yapflib import pytree_visitor
38from yapf.yapflib import split_penalty
39from yapf.yapflib import style
40from yapf.yapflib import unwrapped_line
41
42
43def UnwrapPyTree(tree):
44  """Create and return a list of unwrapped lines from the given pytree.
45
46  Arguments:
47    tree: the top-level pytree node to unwrap.
48
49  Returns:
50    A list of UnwrappedLine objects.
51  """
52  unwrapper = PyTreeUnwrapper()
53  unwrapper.Visit(tree)
54  uwlines = unwrapper.GetUnwrappedLines()
55  uwlines.sort(key=lambda x: x.lineno)
56  return uwlines
57
58
59# Grammar tokens considered as whitespace for the purpose of unwrapping.
60_WHITESPACE_TOKENS = frozenset([
61    grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT,
62    grammar_token.ENDMARKER
63])
64
65
66class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor):
67  """PyTreeUnwrapper - see file-level docstring for detailed description.
68
69  Note: since this implements PyTreeVisitor and node names in lib2to3 are
70  underscore_separated, the visiting methods of this class are named as
71  Visit_node_name. invalid-name pragmas are added to each such method to silence
72  a style warning. This is forced on us by the usage of lib2to3, and re-munging
73  method names to make them different from actual node names sounded like a
74  confusing and brittle affair that wasn't worth it for this small & controlled
75  deviation from the style guide.
76
77  To understand the connection between visitor methods in this class, some
78  familiarity with the Python grammar is required.
79  """
80
81  def __init__(self):
82    # A list of all unwrapped lines finished visiting so far.
83    self._unwrapped_lines = []
84
85    # Builds up a "current" unwrapped line while visiting pytree nodes. Some
86    # nodes will finish a line and start a new one.
87    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0)
88
89    # Current indentation depth.
90    self._cur_depth = 0
91
92  def GetUnwrappedLines(self):
93    """Fetch the result of the tree walk.
94
95    Note: only call this after visiting the whole tree.
96
97    Returns:
98      A list of UnwrappedLine objects.
99    """
100    # Make sure the last line that was being populated is flushed.
101    self._StartNewLine()
102    return self._unwrapped_lines
103
104  def _StartNewLine(self):
105    """Finish current line and start a new one.
106
107    Place the currently accumulated line into the _unwrapped_lines list and
108    start a new one.
109    """
110    if self._cur_unwrapped_line.tokens:
111      self._unwrapped_lines.append(self._cur_unwrapped_line)
112      _MatchBrackets(self._cur_unwrapped_line)
113      _IdentifyParameterLists(self._cur_unwrapped_line)
114      _AdjustSplitPenalty(self._cur_unwrapped_line)
115    self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth)
116
117  _STMT_TYPES = frozenset({
118      'if_stmt',
119      'while_stmt',
120      'for_stmt',
121      'try_stmt',
122      'expect_clause',
123      'with_stmt',
124      'funcdef',
125      'classdef',
126  })
127
128  # pylint: disable=invalid-name,missing-docstring
129  def Visit_simple_stmt(self, node):
130    # A 'simple_stmt' conveniently represents a non-compound Python statement,
131    # i.e. a statement that does not contain other statements.
132
133    # When compound nodes have a single statement as their suite, the parser
134    # can leave it in the tree directly without creating a suite. But we have
135    # to increase depth in these cases as well. However, don't increase the
136    # depth of we have a simple_stmt that's a comment node. This represents a
137    # standalone comment and in the case of it coming directly after the
138    # funcdef, it is a "top" comment for the whole function.
139    # TODO(eliben): add more relevant compound statements here.
140    single_stmt_suite = (
141        node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES)
142    is_comment_stmt = pytree_utils.IsCommentStatement(node)
143    if single_stmt_suite and not is_comment_stmt:
144      self._cur_depth += 1
145    self._StartNewLine()
146    self.DefaultNodeVisit(node)
147    if single_stmt_suite and not is_comment_stmt:
148      self._cur_depth -= 1
149
150  def _VisitCompoundStatement(self, node, substatement_names):
151    """Helper for visiting compound statements.
152
153    Python compound statements serve as containers for other statements. Thus,
154    when we encounter a new compound statement we start a new unwrapped line.
155
156    Arguments:
157      node: the node to visit.
158      substatement_names: set of node names. A compound statement will be
159        recognized as a NAME node with a name in this set.
160    """
161    for child in node.children:
162      # A pytree is structured in such a way that a single 'if_stmt' node will
163      # contain all the 'if', 'elif' and 'else' nodes as children (similar
164      # structure applies to 'while' statements, 'try' blocks, etc). Therefore,
165      # we visit all children here and create a new line before the requested
166      # set of nodes.
167      if (child.type == grammar_token.NAME and
168          child.value in substatement_names):
169        self._StartNewLine()
170      self.Visit(child)
171
172  _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'})
173
174  def Visit_if_stmt(self, node):  # pylint: disable=invalid-name
175    self._VisitCompoundStatement(node, self._IF_STMT_ELEMS)
176
177  _WHILE_STMT_ELEMS = frozenset({'while', 'else'})
178
179  def Visit_while_stmt(self, node):  # pylint: disable=invalid-name
180    self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS)
181
182  _FOR_STMT_ELEMS = frozenset({'for', 'else'})
183
184  def Visit_for_stmt(self, node):  # pylint: disable=invalid-name
185    self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS)
186
187  _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'})
188
189  def Visit_try_stmt(self, node):  # pylint: disable=invalid-name
190    self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS)
191
192  _EXCEPT_STMT_ELEMS = frozenset({'except'})
193
194  def Visit_except_clause(self, node):  # pylint: disable=invalid-name
195    self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS)
196
197  _FUNC_DEF_ELEMS = frozenset({'def'})
198
199  def Visit_funcdef(self, node):  # pylint: disable=invalid-name
200    self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS)
201
202  def Visit_async_funcdef(self, node):  # pylint: disable=invalid-name
203    self._StartNewLine()
204    index = 0
205    for child in node.children:
206      index += 1
207      self.Visit(child)
208      if pytree_utils.NodeName(child) == 'ASYNC':
209        break
210    for child in node.children[index].children:
211      self.Visit(child)
212
213  _CLASS_DEF_ELEMS = frozenset({'class'})
214
215  def Visit_classdef(self, node):  # pylint: disable=invalid-name
216    self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS)
217
218  def Visit_async_stmt(self, node):  # pylint: disable=invalid-name
219    self._StartNewLine()
220    index = 0
221    for child in node.children:
222      index += 1
223      self.Visit(child)
224      if pytree_utils.NodeName(child) == 'ASYNC':
225        break
226    for child in node.children[index].children:
227      if pytree_utils.NodeName(child) == 'NAME' and child.value == 'else':
228        self._StartNewLine()
229      self.Visit(child)
230
231  def Visit_decorator(self, node):  # pylint: disable=invalid-name
232    for child in node.children:
233      self.Visit(child)
234      if (pytree_utils.NodeName(child) == 'COMMENT' and
235          child == node.children[0]):
236        self._StartNewLine()
237
238  def Visit_decorators(self, node):  # pylint: disable=invalid-name
239    for child in node.children:
240      self._StartNewLine()
241      self.Visit(child)
242
243  def Visit_decorated(self, node):  # pylint: disable=invalid-name
244    for child in node.children:
245      self._StartNewLine()
246      self.Visit(child)
247
248  _WITH_STMT_ELEMS = frozenset({'with'})
249
250  def Visit_with_stmt(self, node):  # pylint: disable=invalid-name
251    self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS)
252
253  def Visit_suite(self, node):  # pylint: disable=invalid-name
254    # A 'suite' starts a new indentation level in Python.
255    self._cur_depth += 1
256    self._StartNewLine()
257    self.DefaultNodeVisit(node)
258    self._cur_depth -= 1
259
260  def Visit_listmaker(self, node):  # pylint: disable=invalid-name
261    _DetermineMustSplitAnnotation(node)
262    self.DefaultNodeVisit(node)
263
264  def Visit_dictsetmaker(self, node):  # pylint: disable=invalid-name
265    _DetermineMustSplitAnnotation(node)
266    self.DefaultNodeVisit(node)
267
268  def Visit_import_as_names(self, node):  # pylint: disable=invalid-name
269    if node.prev_sibling.value == '(':
270      _DetermineMustSplitAnnotation(node)
271    self.DefaultNodeVisit(node)
272
273  def Visit_testlist_gexp(self, node):  # pylint: disable=invalid-name
274    _DetermineMustSplitAnnotation(node)
275    self.DefaultNodeVisit(node)
276
277  def Visit_arglist(self, node):  # pylint: disable=invalid-name
278    _DetermineMustSplitAnnotation(node)
279    self.DefaultNodeVisit(node)
280
281  def Visit_typedargslist(self, node):  # pylint: disable=invalid-name
282    _DetermineMustSplitAnnotation(node)
283    self.DefaultNodeVisit(node)
284
285  def DefaultLeafVisit(self, leaf):
286    """Default visitor for tree leaves.
287
288    A tree leaf is always just gets appended to the current unwrapped line.
289
290    Arguments:
291      leaf: the leaf to visit.
292    """
293    if leaf.type in _WHITESPACE_TOKENS:
294      self._StartNewLine()
295    elif leaf.type != grammar_token.COMMENT or leaf.value.strip():
296      # Add non-whitespace tokens and comments that aren't empty.
297      self._cur_unwrapped_line.AppendNode(leaf)
298
299
300_BRACKET_MATCH = {')': '(', '}': '{', ']': '['}
301
302
303def _MatchBrackets(uwline):
304  """Visit the node and match the brackets.
305
306  For every open bracket ('[', '{', or '('), find the associated closing bracket
307  and "match" them up. I.e., save in the token a pointer to its associated open
308  or close bracket.
309
310  Arguments:
311    uwline: (UnwrappedLine) An unwrapped line.
312  """
313  bracket_stack = []
314  for token in uwline.tokens:
315    if token.value in pytree_utils.OPENING_BRACKETS:
316      bracket_stack.append(token)
317    elif token.value in pytree_utils.CLOSING_BRACKETS:
318      bracket_stack[-1].matching_bracket = token
319      token.matching_bracket = bracket_stack[-1]
320      bracket_stack.pop()
321
322    for bracket in bracket_stack:
323      if id(pytree_utils.GetOpeningBracket(token.node)) == id(bracket.node):
324        bracket.container_elements.append(token)
325        token.container_opening = bracket
326
327
328def _IdentifyParameterLists(uwline):
329  """Visit the node to create a state for parameter lists.
330
331  For instance, a parameter is considered an "object" with its first and last
332  token uniquely identifying the object.
333
334  Arguments:
335    uwline: (UnwrappedLine) An unwrapped line.
336  """
337  func_stack = []
338  param_stack = []
339  for tok in uwline.tokens:
340    # Identify parameter list objects.
341    if format_token.Subtype.FUNC_DEF in tok.subtypes:
342      assert tok.next_token.value == '('
343      func_stack.append(tok.next_token)
344      continue
345
346    if func_stack and tok.value == ')':
347      if tok == func_stack[-1].matching_bracket:
348        func_stack.pop()
349      continue
350
351    # Identify parameter objects.
352    if format_token.Subtype.PARAMETER_START in tok.subtypes:
353      param_stack.append(tok)
354
355    # Not "elif", a parameter could be a single token.
356    if param_stack and format_token.Subtype.PARAMETER_STOP in tok.subtypes:
357      start = param_stack.pop()
358      func_stack[-1].parameters.append(object_state.Parameter(start, tok))
359
360
361def _AdjustSplitPenalty(uwline):
362  """Visit the node and adjust the split penalties if needed.
363
364  A token shouldn't be split if it's not within a bracket pair. Mark any token
365  that's not within a bracket pair as "unbreakable".
366
367  Arguments:
368    uwline: (UnwrappedLine) An unwrapped line.
369  """
370  bracket_level = 0
371  for index, token in enumerate(uwline.tokens):
372    if index and not bracket_level:
373      pytree_utils.SetNodeAnnotation(token.node,
374                                     pytree_utils.Annotation.SPLIT_PENALTY,
375                                     split_penalty.UNBREAKABLE)
376    if token.value in pytree_utils.OPENING_BRACKETS:
377      bracket_level += 1
378    elif token.value in pytree_utils.CLOSING_BRACKETS:
379      bracket_level -= 1
380
381
382def _DetermineMustSplitAnnotation(node):
383  """Enforce a split in the list if the list ends with a comma."""
384  if style.Get('DISABLE_ENDING_COMMA_HEURISTIC'):
385    return
386  if not _ContainsComments(node):
387    token = next(node.parent.leaves())
388    if token.value == '(':
389      if sum(1 for ch in node.children
390             if pytree_utils.NodeName(ch) == 'COMMA') < 2:
391        return
392    if (not isinstance(node.children[-1], pytree.Leaf) or
393        node.children[-1].value != ','):
394      return
395  num_children = len(node.children)
396  index = 0
397  _SetMustSplitOnFirstLeaf(node.children[0])
398  while index < num_children - 1:
399    child = node.children[index]
400    if isinstance(child, pytree.Leaf) and child.value == ',':
401      next_child = node.children[index + 1]
402      if next_child.type == grammar_token.COMMENT:
403        index += 1
404        if index >= num_children - 1:
405          break
406      _SetMustSplitOnFirstLeaf(node.children[index + 1])
407    index += 1
408
409
410def _ContainsComments(node):
411  """Return True if the list has a comment in it."""
412  if isinstance(node, pytree.Leaf):
413    return node.type == grammar_token.COMMENT
414  for child in node.children:
415    if _ContainsComments(child):
416      return True
417  return False
418
419
420def _SetMustSplitOnFirstLeaf(node):
421  """Set the "must split" annotation on the first leaf node."""
422  pytree_utils.SetNodeAnnotation(
423      pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.MUST_SPLIT,
424      True)
425