1"""
2    sphinx.pycode
3    ~~~~~~~~~~~~~
4
5    Utilities parsing and analyzing Python code.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11import re
12import tokenize
13import warnings
14from collections import OrderedDict
15from importlib import import_module
16from inspect import Signature
17from io import StringIO
18from os import path
19from typing import IO, Any, Dict, List, Optional, Tuple
20from zipfile import ZipFile
21
22from sphinx.deprecation import RemovedInSphinx40Warning, RemovedInSphinx50Warning
23from sphinx.errors import PycodeError
24from sphinx.pycode.parser import Parser
25
26
27class ModuleAnalyzer:
28    # cache for analyzer objects -- caches both by module and file name
29    cache = {}  # type: Dict[Tuple[str, str], Any]
30
31    @staticmethod
32    def get_module_source(modname: str) -> Tuple[Optional[str], Optional[str]]:
33        """Try to find the source code for a module.
34
35        Returns ('filename', 'source'). One of it can be None if
36        no filename or source found
37        """
38        try:
39            mod = import_module(modname)
40        except Exception as err:
41            raise PycodeError('error importing %r' % modname, err) from err
42        loader = getattr(mod, '__loader__', None)
43        filename = getattr(mod, '__file__', None)
44        if loader and getattr(loader, 'get_source', None):
45            # prefer Native loader, as it respects #coding directive
46            try:
47                source = loader.get_source(modname)
48                if source:
49                    # no exception and not None - it must be module source
50                    return filename, source
51            except ImportError:
52                pass  # Try other "source-mining" methods
53        if filename is None and loader and getattr(loader, 'get_filename', None):
54            # have loader, but no filename
55            try:
56                filename = loader.get_filename(modname)
57            except ImportError as err:
58                raise PycodeError('error getting filename for %r' % modname, err) from err
59        if filename is None:
60            # all methods for getting filename failed, so raise...
61            raise PycodeError('no source found for module %r' % modname)
62        filename = path.normpath(path.abspath(filename))
63        if filename.lower().endswith(('.pyo', '.pyc')):
64            filename = filename[:-1]
65            if not path.isfile(filename) and path.isfile(filename + 'w'):
66                filename += 'w'
67        elif not filename.lower().endswith(('.py', '.pyw')):
68            raise PycodeError('source is not a .py file: %r' % filename)
69        elif ('.egg' + path.sep) in filename:
70            pat = '(?<=\\.egg)' + re.escape(path.sep)
71            eggpath, _ = re.split(pat, filename, 1)
72            if path.isfile(eggpath):
73                return filename, None
74
75        if not path.isfile(filename):
76            raise PycodeError('source file is not present: %r' % filename)
77        return filename, None
78
79    @classmethod
80    def for_string(cls, string: str, modname: str, srcname: str = '<string>'
81                   ) -> "ModuleAnalyzer":
82        return cls(StringIO(string), modname, srcname, decoded=True)
83
84    @classmethod
85    def for_file(cls, filename: str, modname: str) -> "ModuleAnalyzer":
86        if ('file', filename) in cls.cache:
87            return cls.cache['file', filename]
88        try:
89            with tokenize.open(filename) as f:
90                obj = cls(f, modname, filename, decoded=True)
91                cls.cache['file', filename] = obj
92        except Exception as err:
93            if '.egg' + path.sep in filename:
94                obj = cls.cache['file', filename] = cls.for_egg(filename, modname)
95            else:
96                raise PycodeError('error opening %r' % filename, err) from err
97        return obj
98
99    @classmethod
100    def for_egg(cls, filename: str, modname: str) -> "ModuleAnalyzer":
101        SEP = re.escape(path.sep)
102        eggpath, relpath = re.split('(?<=\\.egg)' + SEP, filename)
103        try:
104            with ZipFile(eggpath) as egg:
105                code = egg.read(relpath).decode()
106                return cls.for_string(code, modname, filename)
107        except Exception as exc:
108            raise PycodeError('error opening %r' % filename, exc) from exc
109
110    @classmethod
111    def for_module(cls, modname: str) -> "ModuleAnalyzer":
112        if ('module', modname) in cls.cache:
113            entry = cls.cache['module', modname]
114            if isinstance(entry, PycodeError):
115                raise entry
116            return entry
117
118        try:
119            filename, source = cls.get_module_source(modname)
120            if source is not None:
121                obj = cls.for_string(source, modname, filename or '<string>')
122            elif filename is not None:
123                obj = cls.for_file(filename, modname)
124        except PycodeError as err:
125            cls.cache['module', modname] = err
126            raise
127        cls.cache['module', modname] = obj
128        return obj
129
130    def __init__(self, source: IO, modname: str, srcname: str, decoded: bool = False) -> None:
131        self.modname = modname  # name of the module
132        self.srcname = srcname  # name of the source file
133
134        # cache the source code as well
135        pos = source.tell()
136        if not decoded:
137            warnings.warn('decode option for ModuleAnalyzer is deprecated.',
138                          RemovedInSphinx40Warning, stacklevel=2)
139            self._encoding, _ = tokenize.detect_encoding(source.readline)
140            source.seek(pos)
141            self.code = source.read().decode(self._encoding)
142        else:
143            self._encoding = None
144            self.code = source.read()
145
146        # will be filled by analyze()
147        self.annotations = None  # type: Dict[Tuple[str, str], str]
148        self.attr_docs = None    # type: Dict[Tuple[str, str], List[str]]
149        self.finals = None       # type: List[str]
150        self.overloads = None    # type: Dict[str, List[Signature]]
151        self.tagorder = None     # type: Dict[str, int]
152        self.tags = None         # type: Dict[str, Tuple[str, int, int]]
153        self._analyzed = False
154
155    def parse(self) -> None:
156        """Parse the source code."""
157        warnings.warn('ModuleAnalyzer.parse() is deprecated.',
158                      RemovedInSphinx50Warning, stacklevel=2)
159        self.analyze()
160
161    def analyze(self) -> None:
162        """Analyze the source code."""
163        if self._analyzed:
164            return None
165
166        try:
167            parser = Parser(self.code, self._encoding)
168            parser.parse()
169
170            self.attr_docs = OrderedDict()
171            for (scope, comment) in parser.comments.items():
172                if comment:
173                    self.attr_docs[scope] = comment.splitlines() + ['']
174                else:
175                    self.attr_docs[scope] = ['']
176
177            self.annotations = parser.annotations
178            self.finals = parser.finals
179            self.overloads = parser.overloads
180            self.tags = parser.definitions
181            self.tagorder = parser.deforders
182            self._analyzed = True
183        except Exception as exc:
184            raise PycodeError('parsing %r failed: %r' % (self.srcname, exc)) from exc
185
186    def find_attr_docs(self) -> Dict[Tuple[str, str], List[str]]:
187        """Find class and module-level attributes and their documentation."""
188        self.analyze()
189        return self.attr_docs
190
191    def find_tags(self) -> Dict[str, Tuple[str, int, int]]:
192        """Find class, function and method definitions and their location."""
193        self.analyze()
194        return self.tags
195
196    @property
197    def encoding(self) -> str:
198        warnings.warn('ModuleAnalyzer.encoding is deprecated.',
199                      RemovedInSphinx40Warning, stacklevel=2)
200        return self._encoding
201