1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""pytree-related utilities. 15 16This module collects various utilities related to the parse trees produced by 17the lib2to3 library. 18 19 NodeName(): produces a string name for pytree nodes. 20 ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse 21 a given string with code to a pytree. 22 InsertNodeBefore(): insert a node before another in a pytree. 23 InsertNodeAfter(): insert a node after another in a pytree. 24 {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes. 25""" 26 27import ast 28 29from lib2to3 import pygram 30from lib2to3 import pytree 31from lib2to3.pgen2 import driver 32from lib2to3.pgen2 import parse 33from lib2to3.pgen2 import token 34 35# TODO(eliben): We may want to get rid of this filtering at some point once we 36# have a better understanding of what information we need from the tree. Then, 37# these tokens may be filtered out from the tree before the tree gets to the 38# unwrapper. 39NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER']) 40 41OPENING_BRACKETS = frozenset({'(', '[', '{'}) 42CLOSING_BRACKETS = frozenset({')', ']', '}'}) 43 44 45class Annotation(object): 46 """Annotation names associated with pytrees.""" 47 CHILD_INDENT = 'child_indent' 48 NEWLINES = 'newlines' 49 MUST_SPLIT = 'must_split' 50 SPLIT_PENALTY = 'split_penalty' 51 SUBTYPE = 'subtype' 52 53 54def NodeName(node): 55 """Produce a string name for a given node. 56 57 For a Leaf this is the token name, and for a Node this is the type. 58 59 Arguments: 60 node: a tree node 61 62 Returns: 63 Name as a string. 64 """ 65 # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. 66 if node.type < 256: 67 return token.tok_name[node.type] 68 else: 69 return pygram.python_grammar.number2symbol[node.type] 70 71 72def FirstLeafNode(node): 73 if isinstance(node, pytree.Leaf): 74 return node 75 return FirstLeafNode(node.children[0]) 76 77 78def LastLeafNode(node): 79 if isinstance(node, pytree.Leaf): 80 return node 81 return LastLeafNode(node.children[-1]) 82 83 84# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for 85# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a 86# context where a keyword is disallowed). 87# It forgets to do the same for 'exec' though. Luckily, Python is amenable to 88# monkey-patching. 89_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy() 90del _GRAMMAR_FOR_PY3.keywords['exec'] 91 92_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy() 93del _GRAMMAR_FOR_PY2.keywords['nonlocal'] 94 95 96def ParseCodeToTree(code): 97 """Parse the given code to a lib2to3 pytree. 98 99 Arguments: 100 code: a string with the code to parse. 101 102 Raises: 103 SyntaxError if the code is invalid syntax. 104 parse.ParseError if some other parsing failure. 105 106 Returns: 107 The root node of the parsed tree. 108 """ 109 # This function is tiny, but the incantation for invoking the parser correctly 110 # is sufficiently magical to be worth abstracting away. 111 try: 112 # Try to parse using a Python 3 grammar, which is more permissive (print and 113 # exec are not keywords). 114 parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) 115 tree = parser_driver.parse_string(code, debug=False) 116 except parse.ParseError: 117 # Now try to parse using a Python 2 grammar; If this fails, then 118 # there's something else wrong with the code. 119 try: 120 parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert) 121 tree = parser_driver.parse_string(code, debug=False) 122 except parse.ParseError: 123 # Raise a syntax error if the code is invalid python syntax. 124 try: 125 ast.parse(code) 126 except SyntaxError as e: 127 raise e 128 else: 129 raise 130 return _WrapEndMarker(tree) 131 132 133def _WrapEndMarker(tree): 134 """Wrap a single ENDMARKER token in a "file_input" node. 135 136 Arguments: 137 tree: (pytree.Node) The root node of the parsed tree. 138 139 Returns: 140 The root node of the parsed tree. If the tree is a single ENDMARKER node, 141 then that node is wrapped in a "file_input" node. That will ensure we don't 142 skip comments attached to that node. 143 """ 144 if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER: 145 return pytree.Node(pygram.python_symbols.file_input, [tree]) 146 return tree 147 148 149def InsertNodesBefore(new_nodes, target): 150 """Insert new_nodes before the given target location in the tree. 151 152 Arguments: 153 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 154 tree). 155 target: the target node before which the new node node will be inserted. 156 157 Raises: 158 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 159 """ 160 for node in new_nodes: 161 _InsertNodeAt(node, target, after=False) 162 163 164def InsertNodesAfter(new_nodes, target): 165 """Insert new_nodes after the given target location in the tree. 166 167 Arguments: 168 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 169 tree). 170 target: the target node after which the new node node will be inserted. 171 172 Raises: 173 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 174 """ 175 for node in reversed(new_nodes): 176 _InsertNodeAt(node, target, after=True) 177 178 179def _InsertNodeAt(new_node, target, after=False): 180 """Underlying implementation for node insertion. 181 182 Arguments: 183 new_node: a new node to insert (this node should not be in the tree). 184 target: the target node. 185 after: if True, new_node is inserted after target. Otherwise, it's inserted 186 before target. 187 188 Returns: 189 nothing 190 191 Raises: 192 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 193 """ 194 195 # Protect against attempts to insert nodes which already belong to some tree. 196 if new_node.parent is not None: 197 raise RuntimeError('inserting node which already has a parent', 198 (new_node, new_node.parent)) 199 200 # The code here is based on pytree.Base.next_sibling 201 parent_of_target = target.parent 202 if parent_of_target is None: 203 raise RuntimeError('expected target node to have a parent', (target,)) 204 205 for i, child in enumerate(parent_of_target.children): 206 if child is target: 207 insertion_index = i + 1 if after else i 208 parent_of_target.insert_child(insertion_index, new_node) 209 return 210 211 raise RuntimeError('unable to find insertion point for target node', 212 (target,)) 213 214 215# The following constant and functions implement a simple custom annotation 216# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute 217# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be 218# managed through GetNodeAnnotation and SetNodeAnnotation. 219_NODE_ANNOTATION_PREFIX = '_yapf_annotation_' 220 221 222def CopyYapfAnnotations(src, dst): 223 """Copy all YAPF annotations from the source node to the destination node. 224 225 Arguments: 226 src: the source node. 227 dst: the destination node. 228 """ 229 for annotation in dir(src): 230 if annotation.startswith(_NODE_ANNOTATION_PREFIX): 231 setattr(dst, annotation, getattr(src, annotation, None)) 232 233 234def GetNodeAnnotation(node, annotation, default=None): 235 """Get annotation value from a node. 236 237 Arguments: 238 node: the node. 239 annotation: annotation name - a string. 240 default: the default value to return if there's no annotation. 241 242 Returns: 243 Value of the annotation in the given node. If the node doesn't have this 244 particular annotation name yet, returns default. 245 """ 246 return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default) 247 248 249def SetNodeAnnotation(node, annotation, value): 250 """Set annotation value on a node. 251 252 Arguments: 253 node: the node. 254 annotation: annotation name - a string. 255 value: annotation value to set. 256 """ 257 setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value) 258 259 260def AppendNodeAnnotation(node, annotation, value): 261 """Appends an annotation value to a list of annotations on the node. 262 263 Arguments: 264 node: the node. 265 annotation: annotation name - a string. 266 value: annotation value to set. 267 """ 268 attr = GetNodeAnnotation(node, annotation, set()) 269 attr.add(value) 270 SetNodeAnnotation(node, annotation, attr) 271 272 273def RemoveSubtypeAnnotation(node, value): 274 """Removes an annotation value from the subtype annotations on the node. 275 276 Arguments: 277 node: the node. 278 value: annotation value to remove. 279 """ 280 attr = GetNodeAnnotation(node, Annotation.SUBTYPE) 281 if attr and value in attr: 282 attr.remove(value) 283 SetNodeAnnotation(node, Annotation.SUBTYPE, attr) 284 285 286def GetOpeningBracket(node): 287 """Get opening bracket value from a node. 288 289 Arguments: 290 node: the node. 291 292 Returns: 293 The opening bracket node or None if it couldn't find one. 294 """ 295 return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None) 296 297 298def SetOpeningBracket(node, bracket): 299 """Set opening bracket value for a node. 300 301 Arguments: 302 node: the node. 303 bracket: opening bracket to set. 304 """ 305 setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket) 306 307 308def DumpNodeToString(node): 309 """Dump a string representation of the given node. For debugging. 310 311 Arguments: 312 node: the node. 313 314 Returns: 315 The string representation. 316 """ 317 if isinstance(node, pytree.Leaf): 318 fmt = ('{name}({value}) [lineno={lineno}, column={column}, ' 319 'prefix={prefix}, penalty={penalty}]') 320 return fmt.format( 321 name=NodeName(node), 322 value=_PytreeNodeRepr(node), 323 lineno=node.lineno, 324 column=node.column, 325 prefix=repr(node.prefix), 326 penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None)) 327 else: 328 fmt = '{node} [{len} children] [child_indent="{indent}"]' 329 return fmt.format( 330 node=NodeName(node), 331 len=len(node.children), 332 indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT)) 333 334 335def _PytreeNodeRepr(node): 336 """Like pytree.Node.__repr__, but names instead of numbers for tokens.""" 337 if isinstance(node, pytree.Node): 338 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), 339 [_PytreeNodeRepr(c) for c in node.children]) 340 if isinstance(node, pytree.Leaf): 341 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value) 342 343 344def IsCommentStatement(node): 345 return (NodeName(node) == 'simple_stmt' and 346 node.children[0].type == token.COMMENT) 347