1""" 2 sphinx.pycode.parser 3 ~~~~~~~~~~~~~~~~~~~~ 4 5 Utilities parsing and analyzing Python code. 6 7 :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10import inspect 11import itertools 12import re 13import sys 14import tokenize 15from collections import OrderedDict 16from inspect import Signature 17from token import DEDENT, INDENT, NAME, NEWLINE, NUMBER, OP, STRING 18from tokenize import COMMENT, NL 19from typing import Any, Dict, List, Optional, Tuple 20 21from sphinx.pycode.ast import ast # for py37 or older 22from sphinx.pycode.ast import parse, unparse 23 24comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$') 25indent_re = re.compile('^\\s*$') 26emptyline_re = re.compile('^\\s*(#.*)?$') 27 28 29if sys.version_info >= (3, 6): 30 ASSIGN_NODES = (ast.Assign, ast.AnnAssign) 31else: 32 ASSIGN_NODES = (ast.Assign) 33 34 35def filter_whitespace(code: str) -> str: 36 return code.replace('\f', ' ') # replace FF (form feed) with whitespace 37 38 39def get_assign_targets(node: ast.AST) -> List[ast.expr]: 40 """Get list of targets from Assign and AnnAssign node.""" 41 if isinstance(node, ast.Assign): 42 return node.targets 43 else: 44 return [node.target] # type: ignore 45 46 47def get_lvar_names(node: ast.AST, self: ast.arg = None) -> List[str]: 48 """Convert assignment-AST to variable names. 49 50 This raises `TypeError` if the assignment does not create new variable:: 51 52 ary[0] = 'foo' 53 dic["bar"] = 'baz' 54 # => TypeError 55 """ 56 if self: 57 self_id = self.arg 58 59 node_name = node.__class__.__name__ 60 if node_name in ('Index', 'Num', 'Slice', 'Str', 'Subscript'): 61 raise TypeError('%r does not create new variable' % node) 62 elif node_name == 'Name': 63 if self is None or node.id == self_id: # type: ignore 64 return [node.id] # type: ignore 65 else: 66 raise TypeError('The assignment %r is not instance variable' % node) 67 elif node_name in ('Tuple', 'List'): 68 members = [] 69 for elt in node.elts: # type: ignore 70 try: 71 members.extend(get_lvar_names(elt, self)) 72 except TypeError: 73 pass 74 return members 75 elif node_name == 'Attribute': 76 if node.value.__class__.__name__ == 'Name' and self and node.value.id == self_id: # type: ignore # NOQA 77 # instance variable 78 return ["%s" % get_lvar_names(node.attr, self)[0]] # type: ignore 79 else: 80 raise TypeError('The assignment %r is not instance variable' % node) 81 elif node_name == 'str': 82 return [node] # type: ignore 83 elif node_name == 'Starred': 84 return get_lvar_names(node.value, self) # type: ignore 85 else: 86 raise NotImplementedError('Unexpected node name %r' % node_name) 87 88 89def dedent_docstring(s: str) -> str: 90 """Remove common leading indentation from docstring.""" 91 def dummy() -> None: 92 # dummy function to mock `inspect.getdoc`. 93 pass 94 95 dummy.__doc__ = s 96 docstring = inspect.getdoc(dummy) 97 return docstring.lstrip("\r\n").rstrip("\r\n") 98 99 100class Token: 101 """Better token wrapper for tokenize module.""" 102 103 def __init__(self, kind: int, value: Any, start: Tuple[int, int], end: Tuple[int, int], 104 source: str) -> None: 105 self.kind = kind 106 self.value = value 107 self.start = start 108 self.end = end 109 self.source = source 110 111 def __eq__(self, other: Any) -> bool: 112 if isinstance(other, int): 113 return self.kind == other 114 elif isinstance(other, str): 115 return self.value == other 116 elif isinstance(other, (list, tuple)): 117 return [self.kind, self.value] == list(other) 118 elif other is None: 119 return False 120 else: 121 raise ValueError('Unknown value: %r' % other) 122 123 def match(self, *conditions: Any) -> bool: 124 return any(self == candidate for candidate in conditions) 125 126 def __repr__(self) -> str: 127 return '<Token kind=%r value=%r>' % (tokenize.tok_name[self.kind], 128 self.value.strip()) 129 130 131class TokenProcessor: 132 def __init__(self, buffers: List[str]) -> None: 133 lines = iter(buffers) 134 self.buffers = buffers 135 self.tokens = tokenize.generate_tokens(lambda: next(lines)) 136 self.current = None # type: Token 137 self.previous = None # type: Token 138 139 def get_line(self, lineno: int) -> str: 140 """Returns specified line.""" 141 return self.buffers[lineno - 1] 142 143 def fetch_token(self) -> Token: 144 """Fetch a next token from source code. 145 146 Returns ``None`` if sequence finished. 147 """ 148 try: 149 self.previous = self.current 150 self.current = Token(*next(self.tokens)) 151 except StopIteration: 152 self.current = None 153 154 return self.current 155 156 def fetch_until(self, condition: Any) -> List[Token]: 157 """Fetch tokens until specified token appeared. 158 159 .. note:: This also handles parenthesis well. 160 """ 161 tokens = [] 162 while self.fetch_token(): 163 tokens.append(self.current) 164 if self.current == condition: 165 break 166 elif self.current == [OP, '(']: 167 tokens += self.fetch_until([OP, ')']) 168 elif self.current == [OP, '{']: 169 tokens += self.fetch_until([OP, '}']) 170 elif self.current == [OP, '[']: 171 tokens += self.fetch_until([OP, ']']) 172 173 return tokens 174 175 176class AfterCommentParser(TokenProcessor): 177 """Python source code parser to pick up comment after assignment. 178 179 This parser takes a python code starts with assignment statement, 180 and returns the comments for variable if exists. 181 """ 182 183 def __init__(self, lines: List[str]) -> None: 184 super().__init__(lines) 185 self.comment = None # type: str 186 187 def fetch_rvalue(self) -> List[Token]: 188 """Fetch right-hand value of assignment.""" 189 tokens = [] 190 while self.fetch_token(): 191 tokens.append(self.current) 192 if self.current == [OP, '(']: 193 tokens += self.fetch_until([OP, ')']) 194 elif self.current == [OP, '{']: 195 tokens += self.fetch_until([OP, '}']) 196 elif self.current == [OP, '[']: 197 tokens += self.fetch_until([OP, ']']) 198 elif self.current == INDENT: 199 tokens += self.fetch_until(DEDENT) 200 elif self.current == [OP, ';']: 201 break 202 elif self.current.kind not in (OP, NAME, NUMBER, STRING): 203 break 204 205 return tokens 206 207 def parse(self) -> None: 208 """Parse the code and obtain comment after assignment.""" 209 # skip lvalue (or whole of AnnAssign) 210 while not self.fetch_token().match([OP, '='], NEWLINE, COMMENT): 211 assert self.current 212 213 # skip rvalue (if exists) 214 if self.current == [OP, '=']: 215 self.fetch_rvalue() 216 217 if self.current == COMMENT: 218 self.comment = self.current.value 219 220 221class VariableCommentPicker(ast.NodeVisitor): 222 """Python source code parser to pick up variable comments.""" 223 224 def __init__(self, buffers: List[str], encoding: str) -> None: 225 self.counter = itertools.count() 226 self.buffers = buffers 227 self.encoding = encoding 228 self.context = [] # type: List[str] 229 self.current_classes = [] # type: List[str] 230 self.current_function = None # type: ast.FunctionDef 231 self.comments = OrderedDict() # type: Dict[Tuple[str, str], str] 232 self.annotations = {} # type: Dict[Tuple[str, str], str] 233 self.previous = None # type: ast.AST 234 self.deforders = {} # type: Dict[str, int] 235 self.finals = [] # type: List[str] 236 self.overloads = {} # type: Dict[str, List[Signature]] 237 self.typing = None # type: str 238 self.typing_final = None # type: str 239 self.typing_overload = None # type: str 240 super().__init__() 241 242 def get_qualname_for(self, name: str) -> Optional[List[str]]: 243 """Get qualified name for given object as a list of string.""" 244 if self.current_function: 245 if self.current_classes and self.context[-1] == "__init__": 246 # store variable comments inside __init__ method of classes 247 return self.context[:-1] + [name] 248 else: 249 return None 250 else: 251 return self.context + [name] 252 253 def add_entry(self, name: str) -> None: 254 qualname = self.get_qualname_for(name) 255 if qualname: 256 self.deforders[".".join(qualname)] = next(self.counter) 257 258 def add_final_entry(self, name: str) -> None: 259 qualname = self.get_qualname_for(name) 260 if qualname: 261 self.finals.append(".".join(qualname)) 262 263 def add_overload_entry(self, func: ast.FunctionDef) -> None: 264 # avoid circular import problem 265 from sphinx.util.inspect import signature_from_ast 266 qualname = self.get_qualname_for(func.name) 267 if qualname: 268 overloads = self.overloads.setdefault(".".join(qualname), []) 269 overloads.append(signature_from_ast(func)) 270 271 def add_variable_comment(self, name: str, comment: str) -> None: 272 qualname = self.get_qualname_for(name) 273 if qualname: 274 basename = ".".join(qualname[:-1]) 275 self.comments[(basename, name)] = comment 276 277 def add_variable_annotation(self, name: str, annotation: ast.AST) -> None: 278 qualname = self.get_qualname_for(name) 279 if qualname: 280 basename = ".".join(qualname[:-1]) 281 self.annotations[(basename, name)] = unparse(annotation) 282 283 def is_final(self, decorators: List[ast.expr]) -> bool: 284 final = [] 285 if self.typing: 286 final.append('%s.final' % self.typing) 287 if self.typing_final: 288 final.append(self.typing_final) 289 290 for decorator in decorators: 291 try: 292 if unparse(decorator) in final: 293 return True 294 except NotImplementedError: 295 pass 296 297 return False 298 299 def is_overload(self, decorators: List[ast.expr]) -> bool: 300 overload = [] 301 if self.typing: 302 overload.append('%s.overload' % self.typing) 303 if self.typing_overload: 304 overload.append(self.typing_overload) 305 306 for decorator in decorators: 307 try: 308 if unparse(decorator) in overload: 309 return True 310 except NotImplementedError: 311 pass 312 313 return False 314 315 def get_self(self) -> ast.arg: 316 """Returns the name of first argument if in function.""" 317 if self.current_function and self.current_function.args.args: 318 return self.current_function.args.args[0] 319 else: 320 return None 321 322 def get_line(self, lineno: int) -> str: 323 """Returns specified line.""" 324 return self.buffers[lineno - 1] 325 326 def visit(self, node: ast.AST) -> None: 327 """Updates self.previous to .""" 328 super().visit(node) 329 self.previous = node 330 331 def visit_Import(self, node: ast.Import) -> None: 332 """Handles Import node and record it to definition orders.""" 333 for name in node.names: 334 self.add_entry(name.asname or name.name) 335 336 if name.name == 'typing': 337 self.typing = name.asname or name.name 338 elif name.name == 'typing.final': 339 self.typing_final = name.asname or name.name 340 elif name.name == 'typing.overload': 341 self.typing_overload = name.asname or name.name 342 343 def visit_ImportFrom(self, node: ast.ImportFrom) -> None: 344 """Handles Import node and record it to definition orders.""" 345 for name in node.names: 346 self.add_entry(name.asname or name.name) 347 348 if node.module == 'typing' and name.name == 'final': 349 self.typing_final = name.asname or name.name 350 elif node.module == 'typing' and name.name == 'overload': 351 self.typing_overload = name.asname or name.name 352 353 def visit_Assign(self, node: ast.Assign) -> None: 354 """Handles Assign node and pick up a variable comment.""" 355 try: 356 targets = get_assign_targets(node) 357 varnames = sum([get_lvar_names(t, self=self.get_self()) for t in targets], []) # type: List[str] # NOQA 358 current_line = self.get_line(node.lineno) 359 except TypeError: 360 return # this assignment is not new definition! 361 362 # record annotation 363 if hasattr(node, 'annotation') and node.annotation: # type: ignore 364 for varname in varnames: 365 self.add_variable_annotation(varname, node.annotation) # type: ignore 366 elif hasattr(node, 'type_comment') and node.type_comment: 367 for varname in varnames: 368 self.add_variable_annotation(varname, node.type_comment) # type: ignore 369 370 # check comments after assignment 371 parser = AfterCommentParser([current_line[node.col_offset:]] + 372 self.buffers[node.lineno:]) 373 parser.parse() 374 if parser.comment and comment_re.match(parser.comment): 375 for varname in varnames: 376 self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment)) 377 self.add_entry(varname) 378 return 379 380 # check comments before assignment 381 if indent_re.match(current_line[:node.col_offset]): 382 comment_lines = [] 383 for i in range(node.lineno - 1): 384 before_line = self.get_line(node.lineno - 1 - i) 385 if comment_re.match(before_line): 386 comment_lines.append(comment_re.sub('\\1', before_line)) 387 else: 388 break 389 390 if comment_lines: 391 comment = dedent_docstring('\n'.join(reversed(comment_lines))) 392 for varname in varnames: 393 self.add_variable_comment(varname, comment) 394 self.add_entry(varname) 395 return 396 397 # not commented (record deforders only) 398 for varname in varnames: 399 self.add_entry(varname) 400 401 def visit_AnnAssign(self, node: ast.AST) -> None: # Note: ast.AnnAssign not found in py35 402 """Handles AnnAssign node and pick up a variable comment.""" 403 self.visit_Assign(node) # type: ignore 404 405 def visit_Expr(self, node: ast.Expr) -> None: 406 """Handles Expr node and pick up a comment if string.""" 407 if (isinstance(self.previous, ASSIGN_NODES) and isinstance(node.value, ast.Str)): 408 try: 409 targets = get_assign_targets(self.previous) 410 varnames = get_lvar_names(targets[0], self.get_self()) 411 for varname in varnames: 412 if isinstance(node.value.s, str): 413 docstring = node.value.s 414 else: 415 docstring = node.value.s.decode(self.encoding or 'utf-8') 416 417 self.add_variable_comment(varname, dedent_docstring(docstring)) 418 self.add_entry(varname) 419 except TypeError: 420 pass # this assignment is not new definition! 421 422 def visit_Try(self, node: ast.Try) -> None: 423 """Handles Try node and processes body and else-clause. 424 425 .. note:: pycode parser ignores objects definition in except-clause. 426 """ 427 for subnode in node.body: 428 self.visit(subnode) 429 for subnode in node.orelse: 430 self.visit(subnode) 431 432 def visit_ClassDef(self, node: ast.ClassDef) -> None: 433 """Handles ClassDef node and set context.""" 434 self.current_classes.append(node.name) 435 self.add_entry(node.name) 436 if self.is_final(node.decorator_list): 437 self.add_final_entry(node.name) 438 self.context.append(node.name) 439 self.previous = node 440 for child in node.body: 441 self.visit(child) 442 self.context.pop() 443 self.current_classes.pop() 444 445 def visit_FunctionDef(self, node: ast.FunctionDef) -> None: 446 """Handles FunctionDef node and set context.""" 447 if self.current_function is None: 448 self.add_entry(node.name) # should be called before setting self.current_function 449 if self.is_final(node.decorator_list): 450 self.add_final_entry(node.name) 451 if self.is_overload(node.decorator_list): 452 self.add_overload_entry(node) 453 self.context.append(node.name) 454 self.current_function = node 455 for child in node.body: 456 self.visit(child) 457 self.context.pop() 458 self.current_function = None 459 460 def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: 461 """Handles AsyncFunctionDef node and set context.""" 462 self.visit_FunctionDef(node) # type: ignore 463 464 465class DefinitionFinder(TokenProcessor): 466 """Python source code parser to detect location of functions, 467 classes and methods. 468 """ 469 470 def __init__(self, lines: List[str]) -> None: 471 super().__init__(lines) 472 self.decorator = None # type: Token 473 self.context = [] # type: List[str] 474 self.indents = [] # type: List 475 self.definitions = {} # type: Dict[str, Tuple[str, int, int]] 476 477 def add_definition(self, name: str, entry: Tuple[str, int, int]) -> None: 478 """Add a location of definition.""" 479 if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def': 480 # ignore definition of inner function 481 pass 482 else: 483 self.definitions[name] = entry 484 485 def parse(self) -> None: 486 """Parse the code to obtain location of definitions.""" 487 while True: 488 token = self.fetch_token() 489 if token is None: 490 break 491 elif token == COMMENT: 492 pass 493 elif token == [OP, '@'] and (self.previous is None or 494 self.previous.match(NEWLINE, NL, INDENT, DEDENT)): 495 if self.decorator is None: 496 self.decorator = token 497 elif token.match([NAME, 'class']): 498 self.parse_definition('class') 499 elif token.match([NAME, 'def']): 500 self.parse_definition('def') 501 elif token == INDENT: 502 self.indents.append(('other', None, None)) 503 elif token == DEDENT: 504 self.finalize_block() 505 506 def parse_definition(self, typ: str) -> None: 507 """Parse AST of definition.""" 508 name = self.fetch_token() 509 self.context.append(name.value) 510 funcname = '.'.join(self.context) 511 512 if self.decorator: 513 start_pos = self.decorator.start[0] 514 self.decorator = None 515 else: 516 start_pos = name.start[0] 517 518 self.fetch_until([OP, ':']) 519 if self.fetch_token().match(COMMENT, NEWLINE): 520 self.fetch_until(INDENT) 521 self.indents.append((typ, funcname, start_pos)) 522 else: 523 # one-liner 524 self.add_definition(funcname, (typ, start_pos, name.end[0])) 525 self.context.pop() 526 527 def finalize_block(self) -> None: 528 """Finalize definition block.""" 529 definition = self.indents.pop() 530 if definition[0] != 'other': 531 typ, funcname, start_pos = definition 532 end_pos = self.current.end[0] - 1 533 while emptyline_re.match(self.get_line(end_pos)): 534 end_pos -= 1 535 536 self.add_definition(funcname, (typ, start_pos, end_pos)) 537 self.context.pop() 538 539 540class Parser: 541 """Python source code parser to pick up variable comments. 542 543 This is a better wrapper for ``VariableCommentPicker``. 544 """ 545 546 def __init__(self, code: str, encoding: str = 'utf-8') -> None: 547 self.code = filter_whitespace(code) 548 self.encoding = encoding 549 self.annotations = {} # type: Dict[Tuple[str, str], str] 550 self.comments = {} # type: Dict[Tuple[str, str], str] 551 self.deforders = {} # type: Dict[str, int] 552 self.definitions = {} # type: Dict[str, Tuple[str, int, int]] 553 self.finals = [] # type: List[str] 554 self.overloads = {} # type: Dict[str, List[Signature]] 555 556 def parse(self) -> None: 557 """Parse the source code.""" 558 self.parse_comments() 559 self.parse_definition() 560 561 def parse_comments(self) -> None: 562 """Parse the code and pick up comments.""" 563 tree = parse(self.code) 564 picker = VariableCommentPicker(self.code.splitlines(True), self.encoding) 565 picker.visit(tree) 566 self.annotations = picker.annotations 567 self.comments = picker.comments 568 self.deforders = picker.deforders 569 self.finals = picker.finals 570 self.overloads = picker.overloads 571 572 def parse_definition(self) -> None: 573 """Parse the location of definitions from the code.""" 574 parser = DefinitionFinder(self.code.splitlines(True)) 575 parser.parse() 576 self.definitions = parser.definitions 577