1"""
2    sphinx.pycode.parser
3    ~~~~~~~~~~~~~~~~~~~~
4
5    Utilities parsing and analyzing Python code.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10import inspect
11import itertools
12import re
13import sys
14import tokenize
15from collections import OrderedDict
16from inspect import Signature
17from token import DEDENT, INDENT, NAME, NEWLINE, NUMBER, OP, STRING
18from tokenize import COMMENT, NL
19from typing import Any, Dict, List, Optional, Tuple
20
21from sphinx.pycode.ast import ast  # for py37 or older
22from sphinx.pycode.ast import parse, unparse
23
24comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$')
25indent_re = re.compile('^\\s*$')
26emptyline_re = re.compile('^\\s*(#.*)?$')
27
28
29if sys.version_info >= (3, 6):
30    ASSIGN_NODES = (ast.Assign, ast.AnnAssign)
31else:
32    ASSIGN_NODES = (ast.Assign)
33
34
35def filter_whitespace(code: str) -> str:
36    return code.replace('\f', ' ')  # replace FF (form feed) with whitespace
37
38
39def get_assign_targets(node: ast.AST) -> List[ast.expr]:
40    """Get list of targets from Assign and AnnAssign node."""
41    if isinstance(node, ast.Assign):
42        return node.targets
43    else:
44        return [node.target]  # type: ignore
45
46
47def get_lvar_names(node: ast.AST, self: ast.arg = None) -> List[str]:
48    """Convert assignment-AST to variable names.
49
50    This raises `TypeError` if the assignment does not create new variable::
51
52        ary[0] = 'foo'
53        dic["bar"] = 'baz'
54        # => TypeError
55    """
56    if self:
57        self_id = self.arg
58
59    node_name = node.__class__.__name__
60    if node_name in ('Index', 'Num', 'Slice', 'Str', 'Subscript'):
61        raise TypeError('%r does not create new variable' % node)
62    elif node_name == 'Name':
63        if self is None or node.id == self_id:  # type: ignore
64            return [node.id]  # type: ignore
65        else:
66            raise TypeError('The assignment %r is not instance variable' % node)
67    elif node_name in ('Tuple', 'List'):
68        members = []
69        for elt in node.elts:  # type: ignore
70            try:
71                members.extend(get_lvar_names(elt, self))
72            except TypeError:
73                pass
74        return members
75    elif node_name == 'Attribute':
76        if node.value.__class__.__name__ == 'Name' and self and node.value.id == self_id:  # type: ignore  # NOQA
77            # instance variable
78            return ["%s" % get_lvar_names(node.attr, self)[0]]  # type: ignore
79        else:
80            raise TypeError('The assignment %r is not instance variable' % node)
81    elif node_name == 'str':
82        return [node]  # type: ignore
83    elif node_name == 'Starred':
84        return get_lvar_names(node.value, self)  # type: ignore
85    else:
86        raise NotImplementedError('Unexpected node name %r' % node_name)
87
88
89def dedent_docstring(s: str) -> str:
90    """Remove common leading indentation from docstring."""
91    def dummy() -> None:
92        # dummy function to mock `inspect.getdoc`.
93        pass
94
95    dummy.__doc__ = s
96    docstring = inspect.getdoc(dummy)
97    return docstring.lstrip("\r\n").rstrip("\r\n")
98
99
100class Token:
101    """Better token wrapper for tokenize module."""
102
103    def __init__(self, kind: int, value: Any, start: Tuple[int, int], end: Tuple[int, int],
104                 source: str) -> None:
105        self.kind = kind
106        self.value = value
107        self.start = start
108        self.end = end
109        self.source = source
110
111    def __eq__(self, other: Any) -> bool:
112        if isinstance(other, int):
113            return self.kind == other
114        elif isinstance(other, str):
115            return self.value == other
116        elif isinstance(other, (list, tuple)):
117            return [self.kind, self.value] == list(other)
118        elif other is None:
119            return False
120        else:
121            raise ValueError('Unknown value: %r' % other)
122
123    def match(self, *conditions: Any) -> bool:
124        return any(self == candidate for candidate in conditions)
125
126    def __repr__(self) -> str:
127        return '<Token kind=%r value=%r>' % (tokenize.tok_name[self.kind],
128                                             self.value.strip())
129
130
131class TokenProcessor:
132    def __init__(self, buffers: List[str]) -> None:
133        lines = iter(buffers)
134        self.buffers = buffers
135        self.tokens = tokenize.generate_tokens(lambda: next(lines))
136        self.current = None     # type: Token
137        self.previous = None    # type: Token
138
139    def get_line(self, lineno: int) -> str:
140        """Returns specified line."""
141        return self.buffers[lineno - 1]
142
143    def fetch_token(self) -> Token:
144        """Fetch a next token from source code.
145
146        Returns ``None`` if sequence finished.
147        """
148        try:
149            self.previous = self.current
150            self.current = Token(*next(self.tokens))
151        except StopIteration:
152            self.current = None
153
154        return self.current
155
156    def fetch_until(self, condition: Any) -> List[Token]:
157        """Fetch tokens until specified token appeared.
158
159        .. note:: This also handles parenthesis well.
160        """
161        tokens = []
162        while self.fetch_token():
163            tokens.append(self.current)
164            if self.current == condition:
165                break
166            elif self.current == [OP, '(']:
167                tokens += self.fetch_until([OP, ')'])
168            elif self.current == [OP, '{']:
169                tokens += self.fetch_until([OP, '}'])
170            elif self.current == [OP, '[']:
171                tokens += self.fetch_until([OP, ']'])
172
173        return tokens
174
175
176class AfterCommentParser(TokenProcessor):
177    """Python source code parser to pick up comment after assignment.
178
179    This parser takes a python code starts with assignment statement,
180    and returns the comments for variable if exists.
181    """
182
183    def __init__(self, lines: List[str]) -> None:
184        super().__init__(lines)
185        self.comment = None  # type: str
186
187    def fetch_rvalue(self) -> List[Token]:
188        """Fetch right-hand value of assignment."""
189        tokens = []
190        while self.fetch_token():
191            tokens.append(self.current)
192            if self.current == [OP, '(']:
193                tokens += self.fetch_until([OP, ')'])
194            elif self.current == [OP, '{']:
195                tokens += self.fetch_until([OP, '}'])
196            elif self.current == [OP, '[']:
197                tokens += self.fetch_until([OP, ']'])
198            elif self.current == INDENT:
199                tokens += self.fetch_until(DEDENT)
200            elif self.current == [OP, ';']:
201                break
202            elif self.current.kind not in (OP, NAME, NUMBER, STRING):
203                break
204
205        return tokens
206
207    def parse(self) -> None:
208        """Parse the code and obtain comment after assignment."""
209        # skip lvalue (or whole of AnnAssign)
210        while not self.fetch_token().match([OP, '='], NEWLINE, COMMENT):
211            assert self.current
212
213        # skip rvalue (if exists)
214        if self.current == [OP, '=']:
215            self.fetch_rvalue()
216
217        if self.current == COMMENT:
218            self.comment = self.current.value
219
220
221class VariableCommentPicker(ast.NodeVisitor):
222    """Python source code parser to pick up variable comments."""
223
224    def __init__(self, buffers: List[str], encoding: str) -> None:
225        self.counter = itertools.count()
226        self.buffers = buffers
227        self.encoding = encoding
228        self.context = []               # type: List[str]
229        self.current_classes = []       # type: List[str]
230        self.current_function = None    # type: ast.FunctionDef
231        self.comments = OrderedDict()   # type: Dict[Tuple[str, str], str]
232        self.annotations = {}           # type: Dict[Tuple[str, str], str]
233        self.previous = None            # type: ast.AST
234        self.deforders = {}             # type: Dict[str, int]
235        self.finals = []                # type: List[str]
236        self.overloads = {}             # type: Dict[str, List[Signature]]
237        self.typing = None              # type: str
238        self.typing_final = None        # type: str
239        self.typing_overload = None     # type: str
240        super().__init__()
241
242    def get_qualname_for(self, name: str) -> Optional[List[str]]:
243        """Get qualified name for given object as a list of string."""
244        if self.current_function:
245            if self.current_classes and self.context[-1] == "__init__":
246                # store variable comments inside __init__ method of classes
247                return self.context[:-1] + [name]
248            else:
249                return None
250        else:
251            return self.context + [name]
252
253    def add_entry(self, name: str) -> None:
254        qualname = self.get_qualname_for(name)
255        if qualname:
256            self.deforders[".".join(qualname)] = next(self.counter)
257
258    def add_final_entry(self, name: str) -> None:
259        qualname = self.get_qualname_for(name)
260        if qualname:
261            self.finals.append(".".join(qualname))
262
263    def add_overload_entry(self, func: ast.FunctionDef) -> None:
264        # avoid circular import problem
265        from sphinx.util.inspect import signature_from_ast
266        qualname = self.get_qualname_for(func.name)
267        if qualname:
268            overloads = self.overloads.setdefault(".".join(qualname), [])
269            overloads.append(signature_from_ast(func))
270
271    def add_variable_comment(self, name: str, comment: str) -> None:
272        qualname = self.get_qualname_for(name)
273        if qualname:
274            basename = ".".join(qualname[:-1])
275            self.comments[(basename, name)] = comment
276
277    def add_variable_annotation(self, name: str, annotation: ast.AST) -> None:
278        qualname = self.get_qualname_for(name)
279        if qualname:
280            basename = ".".join(qualname[:-1])
281            self.annotations[(basename, name)] = unparse(annotation)
282
283    def is_final(self, decorators: List[ast.expr]) -> bool:
284        final = []
285        if self.typing:
286            final.append('%s.final' % self.typing)
287        if self.typing_final:
288            final.append(self.typing_final)
289
290        for decorator in decorators:
291            try:
292                if unparse(decorator) in final:
293                    return True
294            except NotImplementedError:
295                pass
296
297        return False
298
299    def is_overload(self, decorators: List[ast.expr]) -> bool:
300        overload = []
301        if self.typing:
302            overload.append('%s.overload' % self.typing)
303        if self.typing_overload:
304            overload.append(self.typing_overload)
305
306        for decorator in decorators:
307            try:
308                if unparse(decorator) in overload:
309                    return True
310            except NotImplementedError:
311                pass
312
313        return False
314
315    def get_self(self) -> ast.arg:
316        """Returns the name of first argument if in function."""
317        if self.current_function and self.current_function.args.args:
318            return self.current_function.args.args[0]
319        else:
320            return None
321
322    def get_line(self, lineno: int) -> str:
323        """Returns specified line."""
324        return self.buffers[lineno - 1]
325
326    def visit(self, node: ast.AST) -> None:
327        """Updates self.previous to ."""
328        super().visit(node)
329        self.previous = node
330
331    def visit_Import(self, node: ast.Import) -> None:
332        """Handles Import node and record it to definition orders."""
333        for name in node.names:
334            self.add_entry(name.asname or name.name)
335
336            if name.name == 'typing':
337                self.typing = name.asname or name.name
338            elif name.name == 'typing.final':
339                self.typing_final = name.asname or name.name
340            elif name.name == 'typing.overload':
341                self.typing_overload = name.asname or name.name
342
343    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
344        """Handles Import node and record it to definition orders."""
345        for name in node.names:
346            self.add_entry(name.asname or name.name)
347
348            if node.module == 'typing' and name.name == 'final':
349                self.typing_final = name.asname or name.name
350            elif node.module == 'typing' and name.name == 'overload':
351                self.typing_overload = name.asname or name.name
352
353    def visit_Assign(self, node: ast.Assign) -> None:
354        """Handles Assign node and pick up a variable comment."""
355        try:
356            targets = get_assign_targets(node)
357            varnames = sum([get_lvar_names(t, self=self.get_self()) for t in targets], [])  # type: List[str]  # NOQA
358            current_line = self.get_line(node.lineno)
359        except TypeError:
360            return  # this assignment is not new definition!
361
362        # record annotation
363        if hasattr(node, 'annotation') and node.annotation:  # type: ignore
364            for varname in varnames:
365                self.add_variable_annotation(varname, node.annotation)  # type: ignore
366        elif hasattr(node, 'type_comment') and node.type_comment:
367            for varname in varnames:
368                self.add_variable_annotation(varname, node.type_comment)  # type: ignore
369
370        # check comments after assignment
371        parser = AfterCommentParser([current_line[node.col_offset:]] +
372                                    self.buffers[node.lineno:])
373        parser.parse()
374        if parser.comment and comment_re.match(parser.comment):
375            for varname in varnames:
376                self.add_variable_comment(varname, comment_re.sub('\\1', parser.comment))
377                self.add_entry(varname)
378            return
379
380        # check comments before assignment
381        if indent_re.match(current_line[:node.col_offset]):
382            comment_lines = []
383            for i in range(node.lineno - 1):
384                before_line = self.get_line(node.lineno - 1 - i)
385                if comment_re.match(before_line):
386                    comment_lines.append(comment_re.sub('\\1', before_line))
387                else:
388                    break
389
390            if comment_lines:
391                comment = dedent_docstring('\n'.join(reversed(comment_lines)))
392                for varname in varnames:
393                    self.add_variable_comment(varname, comment)
394                    self.add_entry(varname)
395                return
396
397        # not commented (record deforders only)
398        for varname in varnames:
399            self.add_entry(varname)
400
401    def visit_AnnAssign(self, node: ast.AST) -> None:  # Note: ast.AnnAssign not found in py35
402        """Handles AnnAssign node and pick up a variable comment."""
403        self.visit_Assign(node)  # type: ignore
404
405    def visit_Expr(self, node: ast.Expr) -> None:
406        """Handles Expr node and pick up a comment if string."""
407        if (isinstance(self.previous, ASSIGN_NODES) and isinstance(node.value, ast.Str)):
408            try:
409                targets = get_assign_targets(self.previous)
410                varnames = get_lvar_names(targets[0], self.get_self())
411                for varname in varnames:
412                    if isinstance(node.value.s, str):
413                        docstring = node.value.s
414                    else:
415                        docstring = node.value.s.decode(self.encoding or 'utf-8')
416
417                    self.add_variable_comment(varname, dedent_docstring(docstring))
418                    self.add_entry(varname)
419            except TypeError:
420                pass  # this assignment is not new definition!
421
422    def visit_Try(self, node: ast.Try) -> None:
423        """Handles Try node and processes body and else-clause.
424
425        .. note:: pycode parser ignores objects definition in except-clause.
426        """
427        for subnode in node.body:
428            self.visit(subnode)
429        for subnode in node.orelse:
430            self.visit(subnode)
431
432    def visit_ClassDef(self, node: ast.ClassDef) -> None:
433        """Handles ClassDef node and set context."""
434        self.current_classes.append(node.name)
435        self.add_entry(node.name)
436        if self.is_final(node.decorator_list):
437            self.add_final_entry(node.name)
438        self.context.append(node.name)
439        self.previous = node
440        for child in node.body:
441            self.visit(child)
442        self.context.pop()
443        self.current_classes.pop()
444
445    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
446        """Handles FunctionDef node and set context."""
447        if self.current_function is None:
448            self.add_entry(node.name)  # should be called before setting self.current_function
449            if self.is_final(node.decorator_list):
450                self.add_final_entry(node.name)
451            if self.is_overload(node.decorator_list):
452                self.add_overload_entry(node)
453            self.context.append(node.name)
454            self.current_function = node
455            for child in node.body:
456                self.visit(child)
457            self.context.pop()
458            self.current_function = None
459
460    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
461        """Handles AsyncFunctionDef node and set context."""
462        self.visit_FunctionDef(node)  # type: ignore
463
464
465class DefinitionFinder(TokenProcessor):
466    """Python source code parser to detect location of functions,
467    classes and methods.
468    """
469
470    def __init__(self, lines: List[str]) -> None:
471        super().__init__(lines)
472        self.decorator = None   # type: Token
473        self.context = []       # type: List[str]
474        self.indents = []       # type: List
475        self.definitions = {}   # type: Dict[str, Tuple[str, int, int]]
476
477    def add_definition(self, name: str, entry: Tuple[str, int, int]) -> None:
478        """Add a location of definition."""
479        if self.indents and self.indents[-1][0] == 'def' and entry[0] == 'def':
480            # ignore definition of inner function
481            pass
482        else:
483            self.definitions[name] = entry
484
485    def parse(self) -> None:
486        """Parse the code to obtain location of definitions."""
487        while True:
488            token = self.fetch_token()
489            if token is None:
490                break
491            elif token == COMMENT:
492                pass
493            elif token == [OP, '@'] and (self.previous is None or
494                                         self.previous.match(NEWLINE, NL, INDENT, DEDENT)):
495                if self.decorator is None:
496                    self.decorator = token
497            elif token.match([NAME, 'class']):
498                self.parse_definition('class')
499            elif token.match([NAME, 'def']):
500                self.parse_definition('def')
501            elif token == INDENT:
502                self.indents.append(('other', None, None))
503            elif token == DEDENT:
504                self.finalize_block()
505
506    def parse_definition(self, typ: str) -> None:
507        """Parse AST of definition."""
508        name = self.fetch_token()
509        self.context.append(name.value)
510        funcname = '.'.join(self.context)
511
512        if self.decorator:
513            start_pos = self.decorator.start[0]
514            self.decorator = None
515        else:
516            start_pos = name.start[0]
517
518        self.fetch_until([OP, ':'])
519        if self.fetch_token().match(COMMENT, NEWLINE):
520            self.fetch_until(INDENT)
521            self.indents.append((typ, funcname, start_pos))
522        else:
523            # one-liner
524            self.add_definition(funcname, (typ, start_pos, name.end[0]))
525            self.context.pop()
526
527    def finalize_block(self) -> None:
528        """Finalize definition block."""
529        definition = self.indents.pop()
530        if definition[0] != 'other':
531            typ, funcname, start_pos = definition
532            end_pos = self.current.end[0] - 1
533            while emptyline_re.match(self.get_line(end_pos)):
534                end_pos -= 1
535
536            self.add_definition(funcname, (typ, start_pos, end_pos))
537            self.context.pop()
538
539
540class Parser:
541    """Python source code parser to pick up variable comments.
542
543    This is a better wrapper for ``VariableCommentPicker``.
544    """
545
546    def __init__(self, code: str, encoding: str = 'utf-8') -> None:
547        self.code = filter_whitespace(code)
548        self.encoding = encoding
549        self.annotations = {}       # type: Dict[Tuple[str, str], str]
550        self.comments = {}          # type: Dict[Tuple[str, str], str]
551        self.deforders = {}         # type: Dict[str, int]
552        self.definitions = {}       # type: Dict[str, Tuple[str, int, int]]
553        self.finals = []            # type: List[str]
554        self.overloads = {}         # type: Dict[str, List[Signature]]
555
556    def parse(self) -> None:
557        """Parse the source code."""
558        self.parse_comments()
559        self.parse_definition()
560
561    def parse_comments(self) -> None:
562        """Parse the code and pick up comments."""
563        tree = parse(self.code)
564        picker = VariableCommentPicker(self.code.splitlines(True), self.encoding)
565        picker.visit(tree)
566        self.annotations = picker.annotations
567        self.comments = picker.comments
568        self.deforders = picker.deforders
569        self.finals = picker.finals
570        self.overloads = picker.overloads
571
572    def parse_definition(self) -> None:
573        """Parse the location of definitions from the code."""
574        parser = DefinitionFinder(self.code.splitlines(True))
575        parser.parse()
576        self.definitions = parser.definitions
577