1""" 2 sphinx.pycode 3 ~~~~~~~~~~~~~ 4 5 Utilities parsing and analyzing Python code. 6 7 :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11import re 12import tokenize 13import warnings 14from collections import OrderedDict 15from importlib import import_module 16from inspect import Signature 17from io import StringIO 18from os import path 19from typing import IO, Any, Dict, List, Optional, Tuple 20from zipfile import ZipFile 21 22from sphinx.deprecation import RemovedInSphinx40Warning, RemovedInSphinx50Warning 23from sphinx.errors import PycodeError 24from sphinx.pycode.parser import Parser 25 26 27class ModuleAnalyzer: 28 # cache for analyzer objects -- caches both by module and file name 29 cache = {} # type: Dict[Tuple[str, str], Any] 30 31 @staticmethod 32 def get_module_source(modname: str) -> Tuple[Optional[str], Optional[str]]: 33 """Try to find the source code for a module. 34 35 Returns ('filename', 'source'). One of it can be None if 36 no filename or source found 37 """ 38 try: 39 mod = import_module(modname) 40 except Exception as err: 41 raise PycodeError('error importing %r' % modname, err) from err 42 loader = getattr(mod, '__loader__', None) 43 filename = getattr(mod, '__file__', None) 44 if loader and getattr(loader, 'get_source', None): 45 # prefer Native loader, as it respects #coding directive 46 try: 47 source = loader.get_source(modname) 48 if source: 49 # no exception and not None - it must be module source 50 return filename, source 51 except ImportError: 52 pass # Try other "source-mining" methods 53 if filename is None and loader and getattr(loader, 'get_filename', None): 54 # have loader, but no filename 55 try: 56 filename = loader.get_filename(modname) 57 except ImportError as err: 58 raise PycodeError('error getting filename for %r' % modname, err) from err 59 if filename is None: 60 # all methods for getting filename failed, so raise... 61 raise PycodeError('no source found for module %r' % modname) 62 filename = path.normpath(path.abspath(filename)) 63 if filename.lower().endswith(('.pyo', '.pyc')): 64 filename = filename[:-1] 65 if not path.isfile(filename) and path.isfile(filename + 'w'): 66 filename += 'w' 67 elif not filename.lower().endswith(('.py', '.pyw')): 68 raise PycodeError('source is not a .py file: %r' % filename) 69 elif ('.egg' + path.sep) in filename: 70 pat = '(?<=\\.egg)' + re.escape(path.sep) 71 eggpath, _ = re.split(pat, filename, 1) 72 if path.isfile(eggpath): 73 return filename, None 74 75 if not path.isfile(filename): 76 raise PycodeError('source file is not present: %r' % filename) 77 return filename, None 78 79 @classmethod 80 def for_string(cls, string: str, modname: str, srcname: str = '<string>' 81 ) -> "ModuleAnalyzer": 82 return cls(StringIO(string), modname, srcname, decoded=True) 83 84 @classmethod 85 def for_file(cls, filename: str, modname: str) -> "ModuleAnalyzer": 86 if ('file', filename) in cls.cache: 87 return cls.cache['file', filename] 88 try: 89 with tokenize.open(filename) as f: 90 obj = cls(f, modname, filename, decoded=True) 91 cls.cache['file', filename] = obj 92 except Exception as err: 93 if '.egg' + path.sep in filename: 94 obj = cls.cache['file', filename] = cls.for_egg(filename, modname) 95 else: 96 raise PycodeError('error opening %r' % filename, err) from err 97 return obj 98 99 @classmethod 100 def for_egg(cls, filename: str, modname: str) -> "ModuleAnalyzer": 101 SEP = re.escape(path.sep) 102 eggpath, relpath = re.split('(?<=\\.egg)' + SEP, filename) 103 try: 104 with ZipFile(eggpath) as egg: 105 code = egg.read(relpath).decode() 106 return cls.for_string(code, modname, filename) 107 except Exception as exc: 108 raise PycodeError('error opening %r' % filename, exc) from exc 109 110 @classmethod 111 def for_module(cls, modname: str) -> "ModuleAnalyzer": 112 if ('module', modname) in cls.cache: 113 entry = cls.cache['module', modname] 114 if isinstance(entry, PycodeError): 115 raise entry 116 return entry 117 118 try: 119 filename, source = cls.get_module_source(modname) 120 if source is not None: 121 obj = cls.for_string(source, modname, filename or '<string>') 122 elif filename is not None: 123 obj = cls.for_file(filename, modname) 124 except PycodeError as err: 125 cls.cache['module', modname] = err 126 raise 127 cls.cache['module', modname] = obj 128 return obj 129 130 def __init__(self, source: IO, modname: str, srcname: str, decoded: bool = False) -> None: 131 self.modname = modname # name of the module 132 self.srcname = srcname # name of the source file 133 134 # cache the source code as well 135 pos = source.tell() 136 if not decoded: 137 warnings.warn('decode option for ModuleAnalyzer is deprecated.', 138 RemovedInSphinx40Warning, stacklevel=2) 139 self._encoding, _ = tokenize.detect_encoding(source.readline) 140 source.seek(pos) 141 self.code = source.read().decode(self._encoding) 142 else: 143 self._encoding = None 144 self.code = source.read() 145 146 # will be filled by analyze() 147 self.annotations = None # type: Dict[Tuple[str, str], str] 148 self.attr_docs = None # type: Dict[Tuple[str, str], List[str]] 149 self.finals = None # type: List[str] 150 self.overloads = None # type: Dict[str, List[Signature]] 151 self.tagorder = None # type: Dict[str, int] 152 self.tags = None # type: Dict[str, Tuple[str, int, int]] 153 self._analyzed = False 154 155 def parse(self) -> None: 156 """Parse the source code.""" 157 warnings.warn('ModuleAnalyzer.parse() is deprecated.', 158 RemovedInSphinx50Warning, stacklevel=2) 159 self.analyze() 160 161 def analyze(self) -> None: 162 """Analyze the source code.""" 163 if self._analyzed: 164 return None 165 166 try: 167 parser = Parser(self.code, self._encoding) 168 parser.parse() 169 170 self.attr_docs = OrderedDict() 171 for (scope, comment) in parser.comments.items(): 172 if comment: 173 self.attr_docs[scope] = comment.splitlines() + [''] 174 else: 175 self.attr_docs[scope] = [''] 176 177 self.annotations = parser.annotations 178 self.finals = parser.finals 179 self.overloads = parser.overloads 180 self.tags = parser.definitions 181 self.tagorder = parser.deforders 182 self._analyzed = True 183 except Exception as exc: 184 raise PycodeError('parsing %r failed: %r' % (self.srcname, exc)) from exc 185 186 def find_attr_docs(self) -> Dict[Tuple[str, str], List[str]]: 187 """Find class and module-level attributes and their documentation.""" 188 self.analyze() 189 return self.attr_docs 190 191 def find_tags(self) -> Dict[str, Tuple[str, int, int]]: 192 """Find class, function and method definitions and their location.""" 193 self.analyze() 194 return self.tags 195 196 @property 197 def encoding(self) -> str: 198 warnings.warn('ModuleAnalyzer.encoding is deprecated.', 199 RemovedInSphinx40Warning, stacklevel=2) 200 return self._encoding 201