1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""PyTreeUnwrapper - produces a list of unwrapped lines from a pytree. 15 16[for a description of what an unwrapped line is, see unwrapped_line.py] 17 18This is a pytree visitor that goes over a parse tree and produces a list of 19UnwrappedLine containers from it, each with its own depth and containing all 20the tokens that could fit on the line if there were no maximal line-length 21limitations. 22 23Note: a precondition to running this visitor and obtaining correct results is 24for the tree to have its comments spliced in as nodes. Prefixes are ignored. 25 26For most uses, the convenience function UnwrapPyTree should be sufficient. 27""" 28 29# The word "token" is overloaded within this module, so for clarity rename 30# the imported pgen2.token module. 31from lib2to3 import pytree 32from lib2to3.pgen2 import token as grammar_token 33 34from yapf.yapflib import format_token 35from yapf.yapflib import object_state 36from yapf.yapflib import pytree_utils 37from yapf.yapflib import pytree_visitor 38from yapf.yapflib import split_penalty 39from yapf.yapflib import style 40from yapf.yapflib import unwrapped_line 41 42 43def UnwrapPyTree(tree): 44 """Create and return a list of unwrapped lines from the given pytree. 45 46 Arguments: 47 tree: the top-level pytree node to unwrap. 48 49 Returns: 50 A list of UnwrappedLine objects. 51 """ 52 unwrapper = PyTreeUnwrapper() 53 unwrapper.Visit(tree) 54 uwlines = unwrapper.GetUnwrappedLines() 55 uwlines.sort(key=lambda x: x.lineno) 56 return uwlines 57 58 59# Grammar tokens considered as whitespace for the purpose of unwrapping. 60_WHITESPACE_TOKENS = frozenset([ 61 grammar_token.NEWLINE, grammar_token.DEDENT, grammar_token.INDENT, 62 grammar_token.ENDMARKER 63]) 64 65 66class PyTreeUnwrapper(pytree_visitor.PyTreeVisitor): 67 """PyTreeUnwrapper - see file-level docstring for detailed description. 68 69 Note: since this implements PyTreeVisitor and node names in lib2to3 are 70 underscore_separated, the visiting methods of this class are named as 71 Visit_node_name. invalid-name pragmas are added to each such method to silence 72 a style warning. This is forced on us by the usage of lib2to3, and re-munging 73 method names to make them different from actual node names sounded like a 74 confusing and brittle affair that wasn't worth it for this small & controlled 75 deviation from the style guide. 76 77 To understand the connection between visitor methods in this class, some 78 familiarity with the Python grammar is required. 79 """ 80 81 def __init__(self): 82 # A list of all unwrapped lines finished visiting so far. 83 self._unwrapped_lines = [] 84 85 # Builds up a "current" unwrapped line while visiting pytree nodes. Some 86 # nodes will finish a line and start a new one. 87 self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(0) 88 89 # Current indentation depth. 90 self._cur_depth = 0 91 92 def GetUnwrappedLines(self): 93 """Fetch the result of the tree walk. 94 95 Note: only call this after visiting the whole tree. 96 97 Returns: 98 A list of UnwrappedLine objects. 99 """ 100 # Make sure the last line that was being populated is flushed. 101 self._StartNewLine() 102 return self._unwrapped_lines 103 104 def _StartNewLine(self): 105 """Finish current line and start a new one. 106 107 Place the currently accumulated line into the _unwrapped_lines list and 108 start a new one. 109 """ 110 if self._cur_unwrapped_line.tokens: 111 self._unwrapped_lines.append(self._cur_unwrapped_line) 112 _MatchBrackets(self._cur_unwrapped_line) 113 _IdentifyParameterLists(self._cur_unwrapped_line) 114 _AdjustSplitPenalty(self._cur_unwrapped_line) 115 self._cur_unwrapped_line = unwrapped_line.UnwrappedLine(self._cur_depth) 116 117 _STMT_TYPES = frozenset({ 118 'if_stmt', 119 'while_stmt', 120 'for_stmt', 121 'try_stmt', 122 'expect_clause', 123 'with_stmt', 124 'funcdef', 125 'classdef', 126 }) 127 128 # pylint: disable=invalid-name,missing-docstring 129 def Visit_simple_stmt(self, node): 130 # A 'simple_stmt' conveniently represents a non-compound Python statement, 131 # i.e. a statement that does not contain other statements. 132 133 # When compound nodes have a single statement as their suite, the parser 134 # can leave it in the tree directly without creating a suite. But we have 135 # to increase depth in these cases as well. However, don't increase the 136 # depth of we have a simple_stmt that's a comment node. This represents a 137 # standalone comment and in the case of it coming directly after the 138 # funcdef, it is a "top" comment for the whole function. 139 # TODO(eliben): add more relevant compound statements here. 140 single_stmt_suite = ( 141 node.parent and pytree_utils.NodeName(node.parent) in self._STMT_TYPES) 142 is_comment_stmt = pytree_utils.IsCommentStatement(node) 143 if single_stmt_suite and not is_comment_stmt: 144 self._cur_depth += 1 145 self._StartNewLine() 146 self.DefaultNodeVisit(node) 147 if single_stmt_suite and not is_comment_stmt: 148 self._cur_depth -= 1 149 150 def _VisitCompoundStatement(self, node, substatement_names): 151 """Helper for visiting compound statements. 152 153 Python compound statements serve as containers for other statements. Thus, 154 when we encounter a new compound statement we start a new unwrapped line. 155 156 Arguments: 157 node: the node to visit. 158 substatement_names: set of node names. A compound statement will be 159 recognized as a NAME node with a name in this set. 160 """ 161 for child in node.children: 162 # A pytree is structured in such a way that a single 'if_stmt' node will 163 # contain all the 'if', 'elif' and 'else' nodes as children (similar 164 # structure applies to 'while' statements, 'try' blocks, etc). Therefore, 165 # we visit all children here and create a new line before the requested 166 # set of nodes. 167 if (child.type == grammar_token.NAME and 168 child.value in substatement_names): 169 self._StartNewLine() 170 self.Visit(child) 171 172 _IF_STMT_ELEMS = frozenset({'if', 'else', 'elif'}) 173 174 def Visit_if_stmt(self, node): # pylint: disable=invalid-name 175 self._VisitCompoundStatement(node, self._IF_STMT_ELEMS) 176 177 _WHILE_STMT_ELEMS = frozenset({'while', 'else'}) 178 179 def Visit_while_stmt(self, node): # pylint: disable=invalid-name 180 self._VisitCompoundStatement(node, self._WHILE_STMT_ELEMS) 181 182 _FOR_STMT_ELEMS = frozenset({'for', 'else'}) 183 184 def Visit_for_stmt(self, node): # pylint: disable=invalid-name 185 self._VisitCompoundStatement(node, self._FOR_STMT_ELEMS) 186 187 _TRY_STMT_ELEMS = frozenset({'try', 'except', 'else', 'finally'}) 188 189 def Visit_try_stmt(self, node): # pylint: disable=invalid-name 190 self._VisitCompoundStatement(node, self._TRY_STMT_ELEMS) 191 192 _EXCEPT_STMT_ELEMS = frozenset({'except'}) 193 194 def Visit_except_clause(self, node): # pylint: disable=invalid-name 195 self._VisitCompoundStatement(node, self._EXCEPT_STMT_ELEMS) 196 197 _FUNC_DEF_ELEMS = frozenset({'def'}) 198 199 def Visit_funcdef(self, node): # pylint: disable=invalid-name 200 self._VisitCompoundStatement(node, self._FUNC_DEF_ELEMS) 201 202 def Visit_async_funcdef(self, node): # pylint: disable=invalid-name 203 self._StartNewLine() 204 index = 0 205 for child in node.children: 206 index += 1 207 self.Visit(child) 208 if pytree_utils.NodeName(child) == 'ASYNC': 209 break 210 for child in node.children[index].children: 211 self.Visit(child) 212 213 _CLASS_DEF_ELEMS = frozenset({'class'}) 214 215 def Visit_classdef(self, node): # pylint: disable=invalid-name 216 self._VisitCompoundStatement(node, self._CLASS_DEF_ELEMS) 217 218 def Visit_async_stmt(self, node): # pylint: disable=invalid-name 219 self._StartNewLine() 220 index = 0 221 for child in node.children: 222 index += 1 223 self.Visit(child) 224 if pytree_utils.NodeName(child) == 'ASYNC': 225 break 226 for child in node.children[index].children: 227 if pytree_utils.NodeName(child) == 'NAME' and child.value == 'else': 228 self._StartNewLine() 229 self.Visit(child) 230 231 def Visit_decorator(self, node): # pylint: disable=invalid-name 232 for child in node.children: 233 self.Visit(child) 234 if (pytree_utils.NodeName(child) == 'COMMENT' and 235 child == node.children[0]): 236 self._StartNewLine() 237 238 def Visit_decorators(self, node): # pylint: disable=invalid-name 239 for child in node.children: 240 self._StartNewLine() 241 self.Visit(child) 242 243 def Visit_decorated(self, node): # pylint: disable=invalid-name 244 for child in node.children: 245 self._StartNewLine() 246 self.Visit(child) 247 248 _WITH_STMT_ELEMS = frozenset({'with'}) 249 250 def Visit_with_stmt(self, node): # pylint: disable=invalid-name 251 self._VisitCompoundStatement(node, self._WITH_STMT_ELEMS) 252 253 def Visit_suite(self, node): # pylint: disable=invalid-name 254 # A 'suite' starts a new indentation level in Python. 255 self._cur_depth += 1 256 self._StartNewLine() 257 self.DefaultNodeVisit(node) 258 self._cur_depth -= 1 259 260 def Visit_listmaker(self, node): # pylint: disable=invalid-name 261 _DetermineMustSplitAnnotation(node) 262 self.DefaultNodeVisit(node) 263 264 def Visit_dictsetmaker(self, node): # pylint: disable=invalid-name 265 _DetermineMustSplitAnnotation(node) 266 self.DefaultNodeVisit(node) 267 268 def Visit_import_as_names(self, node): # pylint: disable=invalid-name 269 if node.prev_sibling.value == '(': 270 _DetermineMustSplitAnnotation(node) 271 self.DefaultNodeVisit(node) 272 273 def Visit_testlist_gexp(self, node): # pylint: disable=invalid-name 274 _DetermineMustSplitAnnotation(node) 275 self.DefaultNodeVisit(node) 276 277 def Visit_arglist(self, node): # pylint: disable=invalid-name 278 _DetermineMustSplitAnnotation(node) 279 self.DefaultNodeVisit(node) 280 281 def Visit_typedargslist(self, node): # pylint: disable=invalid-name 282 _DetermineMustSplitAnnotation(node) 283 self.DefaultNodeVisit(node) 284 285 def DefaultLeafVisit(self, leaf): 286 """Default visitor for tree leaves. 287 288 A tree leaf is always just gets appended to the current unwrapped line. 289 290 Arguments: 291 leaf: the leaf to visit. 292 """ 293 if leaf.type in _WHITESPACE_TOKENS: 294 self._StartNewLine() 295 elif leaf.type != grammar_token.COMMENT or leaf.value.strip(): 296 # Add non-whitespace tokens and comments that aren't empty. 297 self._cur_unwrapped_line.AppendNode(leaf) 298 299 300_BRACKET_MATCH = {')': '(', '}': '{', ']': '['} 301 302 303def _MatchBrackets(uwline): 304 """Visit the node and match the brackets. 305 306 For every open bracket ('[', '{', or '('), find the associated closing bracket 307 and "match" them up. I.e., save in the token a pointer to its associated open 308 or close bracket. 309 310 Arguments: 311 uwline: (UnwrappedLine) An unwrapped line. 312 """ 313 bracket_stack = [] 314 for token in uwline.tokens: 315 if token.value in pytree_utils.OPENING_BRACKETS: 316 bracket_stack.append(token) 317 elif token.value in pytree_utils.CLOSING_BRACKETS: 318 bracket_stack[-1].matching_bracket = token 319 token.matching_bracket = bracket_stack[-1] 320 bracket_stack.pop() 321 322 for bracket in bracket_stack: 323 if id(pytree_utils.GetOpeningBracket(token.node)) == id(bracket.node): 324 bracket.container_elements.append(token) 325 token.container_opening = bracket 326 327 328def _IdentifyParameterLists(uwline): 329 """Visit the node to create a state for parameter lists. 330 331 For instance, a parameter is considered an "object" with its first and last 332 token uniquely identifying the object. 333 334 Arguments: 335 uwline: (UnwrappedLine) An unwrapped line. 336 """ 337 func_stack = [] 338 param_stack = [] 339 for tok in uwline.tokens: 340 # Identify parameter list objects. 341 if format_token.Subtype.FUNC_DEF in tok.subtypes: 342 assert tok.next_token.value == '(' 343 func_stack.append(tok.next_token) 344 continue 345 346 if func_stack and tok.value == ')': 347 if tok == func_stack[-1].matching_bracket: 348 func_stack.pop() 349 continue 350 351 # Identify parameter objects. 352 if format_token.Subtype.PARAMETER_START in tok.subtypes: 353 param_stack.append(tok) 354 355 # Not "elif", a parameter could be a single token. 356 if param_stack and format_token.Subtype.PARAMETER_STOP in tok.subtypes: 357 start = param_stack.pop() 358 func_stack[-1].parameters.append(object_state.Parameter(start, tok)) 359 360 361def _AdjustSplitPenalty(uwline): 362 """Visit the node and adjust the split penalties if needed. 363 364 A token shouldn't be split if it's not within a bracket pair. Mark any token 365 that's not within a bracket pair as "unbreakable". 366 367 Arguments: 368 uwline: (UnwrappedLine) An unwrapped line. 369 """ 370 bracket_level = 0 371 for index, token in enumerate(uwline.tokens): 372 if index and not bracket_level: 373 pytree_utils.SetNodeAnnotation(token.node, 374 pytree_utils.Annotation.SPLIT_PENALTY, 375 split_penalty.UNBREAKABLE) 376 if token.value in pytree_utils.OPENING_BRACKETS: 377 bracket_level += 1 378 elif token.value in pytree_utils.CLOSING_BRACKETS: 379 bracket_level -= 1 380 381 382def _DetermineMustSplitAnnotation(node): 383 """Enforce a split in the list if the list ends with a comma.""" 384 if style.Get('DISABLE_ENDING_COMMA_HEURISTIC'): 385 return 386 if not _ContainsComments(node): 387 token = next(node.parent.leaves()) 388 if token.value == '(': 389 if sum(1 for ch in node.children 390 if pytree_utils.NodeName(ch) == 'COMMA') < 2: 391 return 392 if (not isinstance(node.children[-1], pytree.Leaf) or 393 node.children[-1].value != ','): 394 return 395 num_children = len(node.children) 396 index = 0 397 _SetMustSplitOnFirstLeaf(node.children[0]) 398 while index < num_children - 1: 399 child = node.children[index] 400 if isinstance(child, pytree.Leaf) and child.value == ',': 401 next_child = node.children[index + 1] 402 if next_child.type == grammar_token.COMMENT: 403 index += 1 404 if index >= num_children - 1: 405 break 406 _SetMustSplitOnFirstLeaf(node.children[index + 1]) 407 index += 1 408 409 410def _ContainsComments(node): 411 """Return True if the list has a comment in it.""" 412 if isinstance(node, pytree.Leaf): 413 return node.type == grammar_token.COMMENT 414 for child in node.children: 415 if _ContainsComments(child): 416 return True 417 return False 418 419 420def _SetMustSplitOnFirstLeaf(node): 421 """Set the "must split" annotation on the first leaf node.""" 422 pytree_utils.SetNodeAnnotation( 423 pytree_utils.FirstLeafNode(node), pytree_utils.Annotation.MUST_SPLIT, 424 True) 425