1import io 2import os 3import re 4import abc 5import csv 6import sys 7import email 8import pathlib 9import zipfile 10import operator 11import functools 12import itertools 13import posixpath 14import collections 15 16from configparser import ConfigParser 17from contextlib import suppress 18from importlib import import_module 19from importlib.abc import MetaPathFinder 20from itertools import starmap 21 22 23__all__ = [ 24 'Distribution', 25 'DistributionFinder', 26 'PackageNotFoundError', 27 'distribution', 28 'distributions', 29 'entry_points', 30 'files', 31 'metadata', 32 'requires', 33 'version', 34 ] 35 36 37class PackageNotFoundError(ModuleNotFoundError): 38 """The package was not found.""" 39 40 41class EntryPoint( 42 collections.namedtuple('EntryPointBase', 'name value group')): 43 """An entry point as defined by Python packaging conventions. 44 45 See `the packaging docs on entry points 46 <https://packaging.python.org/specifications/entry-points/>`_ 47 for more information. 48 """ 49 50 pattern = re.compile( 51 r'(?P<module>[\w.]+)\s*' 52 r'(:\s*(?P<attr>[\w.]+))?\s*' 53 r'(?P<extras>\[.*\])?\s*$' 54 ) 55 """ 56 A regular expression describing the syntax for an entry point, 57 which might look like: 58 59 - module 60 - package.module 61 - package.module:attribute 62 - package.module:object.attribute 63 - package.module:attr [extra1, extra2] 64 65 Other combinations are possible as well. 66 67 The expression is lenient about whitespace around the ':', 68 following the attr, and following any extras. 69 """ 70 71 def load(self): 72 """Load the entry point from its definition. If only a module 73 is indicated by the value, return that module. Otherwise, 74 return the named object. 75 """ 76 match = self.pattern.match(self.value) 77 module = import_module(match.group('module')) 78 attrs = filter(None, (match.group('attr') or '').split('.')) 79 return functools.reduce(getattr, attrs, module) 80 81 @property 82 def extras(self): 83 match = self.pattern.match(self.value) 84 return list(re.finditer(r'\w+', match.group('extras') or '')) 85 86 @classmethod 87 def _from_config(cls, config): 88 return [ 89 cls(name, value, group) 90 for group in config.sections() 91 for name, value in config.items(group) 92 ] 93 94 @classmethod 95 def _from_text(cls, text): 96 config = ConfigParser(delimiters='=') 97 # case sensitive: https://stackoverflow.com/q/1611799/812183 98 config.optionxform = str 99 try: 100 config.read_string(text) 101 except AttributeError: # pragma: nocover 102 # Python 2 has no read_string 103 config.readfp(io.StringIO(text)) 104 return EntryPoint._from_config(config) 105 106 def __iter__(self): 107 """ 108 Supply iter so one may construct dicts of EntryPoints easily. 109 """ 110 return iter((self.name, self)) 111 112 def __reduce__(self): 113 return ( 114 self.__class__, 115 (self.name, self.value, self.group), 116 ) 117 118 119class PackagePath(pathlib.PurePosixPath): 120 """A reference to a path in a package""" 121 122 def read_text(self, encoding='utf-8'): 123 with self.locate().open(encoding=encoding) as stream: 124 return stream.read() 125 126 def read_binary(self): 127 with self.locate().open('rb') as stream: 128 return stream.read() 129 130 def locate(self): 131 """Return a path-like object for this path""" 132 return self.dist.locate_file(self) 133 134 135class FileHash: 136 def __init__(self, spec): 137 self.mode, _, self.value = spec.partition('=') 138 139 def __repr__(self): 140 return '<FileHash mode: {} value: {}>'.format(self.mode, self.value) 141 142 143class Distribution: 144 """A Python distribution package.""" 145 146 @abc.abstractmethod 147 def read_text(self, filename): 148 """Attempt to load metadata file given by the name. 149 150 :param filename: The name of the file in the distribution info. 151 :return: The text if found, otherwise None. 152 """ 153 154 @abc.abstractmethod 155 def locate_file(self, path): 156 """ 157 Given a path to a file in this distribution, return a path 158 to it. 159 """ 160 161 @classmethod 162 def from_name(cls, name): 163 """Return the Distribution for the given package name. 164 165 :param name: The name of the distribution package to search for. 166 :return: The Distribution instance (or subclass thereof) for the named 167 package, if found. 168 :raises PackageNotFoundError: When the named package's distribution 169 metadata cannot be found. 170 """ 171 for resolver in cls._discover_resolvers(): 172 dists = resolver(DistributionFinder.Context(name=name)) 173 dist = next(dists, None) 174 if dist is not None: 175 return dist 176 else: 177 raise PackageNotFoundError(name) 178 179 @classmethod 180 def discover(cls, **kwargs): 181 """Return an iterable of Distribution objects for all packages. 182 183 Pass a ``context`` or pass keyword arguments for constructing 184 a context. 185 186 :context: A ``DistributionFinder.Context`` object. 187 :return: Iterable of Distribution objects for all packages. 188 """ 189 context = kwargs.pop('context', None) 190 if context and kwargs: 191 raise ValueError("cannot accept context and kwargs") 192 context = context or DistributionFinder.Context(**kwargs) 193 return itertools.chain.from_iterable( 194 resolver(context) 195 for resolver in cls._discover_resolvers() 196 ) 197 198 @staticmethod 199 def at(path): 200 """Return a Distribution for the indicated metadata path 201 202 :param path: a string or path-like object 203 :return: a concrete Distribution instance for the path 204 """ 205 return PathDistribution(pathlib.Path(path)) 206 207 @staticmethod 208 def _discover_resolvers(): 209 """Search the meta_path for resolvers.""" 210 declared = ( 211 getattr(finder, 'find_distributions', None) 212 for finder in sys.meta_path 213 ) 214 return filter(None, declared) 215 216 @property 217 def metadata(self): 218 """Return the parsed metadata for this Distribution. 219 220 The returned object will have keys that name the various bits of 221 metadata. See PEP 566 for details. 222 """ 223 text = ( 224 self.read_text('METADATA') 225 or self.read_text('PKG-INFO') 226 # This last clause is here to support old egg-info files. Its 227 # effect is to just end up using the PathDistribution's self._path 228 # (which points to the egg-info file) attribute unchanged. 229 or self.read_text('') 230 ) 231 return email.message_from_string(text) 232 233 @property 234 def version(self): 235 """Return the 'Version' metadata for the distribution package.""" 236 return self.metadata['Version'] 237 238 @property 239 def entry_points(self): 240 return EntryPoint._from_text(self.read_text('entry_points.txt')) 241 242 @property 243 def files(self): 244 """Files in this distribution. 245 246 :return: List of PackagePath for this distribution or None 247 248 Result is `None` if the metadata file that enumerates files 249 (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is 250 missing. 251 Result may be empty if the metadata exists but is empty. 252 """ 253 file_lines = self._read_files_distinfo() or self._read_files_egginfo() 254 255 def make_file(name, hash=None, size_str=None): 256 result = PackagePath(name) 257 result.hash = FileHash(hash) if hash else None 258 result.size = int(size_str) if size_str else None 259 result.dist = self 260 return result 261 262 return file_lines and list(starmap(make_file, csv.reader(file_lines))) 263 264 def _read_files_distinfo(self): 265 """ 266 Read the lines of RECORD 267 """ 268 text = self.read_text('RECORD') 269 return text and text.splitlines() 270 271 def _read_files_egginfo(self): 272 """ 273 SOURCES.txt might contain literal commas, so wrap each line 274 in quotes. 275 """ 276 text = self.read_text('SOURCES.txt') 277 return text and map('"{}"'.format, text.splitlines()) 278 279 @property 280 def requires(self): 281 """Generated requirements specified for this Distribution""" 282 reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs() 283 return reqs and list(reqs) 284 285 def _read_dist_info_reqs(self): 286 return self.metadata.get_all('Requires-Dist') 287 288 def _read_egg_info_reqs(self): 289 source = self.read_text('requires.txt') 290 return source and self._deps_from_requires_text(source) 291 292 @classmethod 293 def _deps_from_requires_text(cls, source): 294 section_pairs = cls._read_sections(source.splitlines()) 295 sections = { 296 section: list(map(operator.itemgetter('line'), results)) 297 for section, results in 298 itertools.groupby(section_pairs, operator.itemgetter('section')) 299 } 300 return cls._convert_egg_info_reqs_to_simple_reqs(sections) 301 302 @staticmethod 303 def _read_sections(lines): 304 section = None 305 for line in filter(None, lines): 306 section_match = re.match(r'\[(.*)\]$', line) 307 if section_match: 308 section = section_match.group(1) 309 continue 310 yield locals() 311 312 @staticmethod 313 def _convert_egg_info_reqs_to_simple_reqs(sections): 314 """ 315 Historically, setuptools would solicit and store 'extra' 316 requirements, including those with environment markers, 317 in separate sections. More modern tools expect each 318 dependency to be defined separately, with any relevant 319 extras and environment markers attached directly to that 320 requirement. This method converts the former to the 321 latter. See _test_deps_from_requires_text for an example. 322 """ 323 def make_condition(name): 324 return name and 'extra == "{name}"'.format(name=name) 325 326 def parse_condition(section): 327 section = section or '' 328 extra, sep, markers = section.partition(':') 329 if extra and markers: 330 markers = '({markers})'.format(markers=markers) 331 conditions = list(filter(None, [markers, make_condition(extra)])) 332 return '; ' + ' and '.join(conditions) if conditions else '' 333 334 for section, deps in sections.items(): 335 for dep in deps: 336 yield dep + parse_condition(section) 337 338 339class DistributionFinder(MetaPathFinder): 340 """ 341 A MetaPathFinder capable of discovering installed distributions. 342 """ 343 344 class Context: 345 """ 346 Keyword arguments presented by the caller to 347 ``distributions()`` or ``Distribution.discover()`` 348 to narrow the scope of a search for distributions 349 in all DistributionFinders. 350 351 Each DistributionFinder may expect any parameters 352 and should attempt to honor the canonical 353 parameters defined below when appropriate. 354 """ 355 356 name = None 357 """ 358 Specific name for which a distribution finder should match. 359 A name of ``None`` matches all distributions. 360 """ 361 362 def __init__(self, **kwargs): 363 vars(self).update(kwargs) 364 365 @property 366 def path(self): 367 """ 368 The path that a distribution finder should search. 369 370 Typically refers to Python package paths and defaults 371 to ``sys.path``. 372 """ 373 return vars(self).get('path', sys.path) 374 375 @abc.abstractmethod 376 def find_distributions(self, context=Context()): 377 """ 378 Find distributions. 379 380 Return an iterable of all Distribution instances capable of 381 loading the metadata for packages matching the ``context``, 382 a DistributionFinder.Context instance. 383 """ 384 385 386class FastPath: 387 """ 388 Micro-optimized class for searching a path for 389 children. 390 """ 391 392 def __init__(self, root): 393 self.root = root 394 self.base = os.path.basename(root).lower() 395 396 def joinpath(self, child): 397 return pathlib.Path(self.root, child) 398 399 def children(self): 400 with suppress(Exception): 401 return os.listdir(self.root or '') 402 with suppress(Exception): 403 return self.zip_children() 404 return [] 405 406 def zip_children(self): 407 zip_path = zipfile.Path(self.root) 408 names = zip_path.root.namelist() 409 self.joinpath = zip_path.joinpath 410 411 return dict.fromkeys( 412 child.split(posixpath.sep, 1)[0] 413 for child in names 414 ) 415 416 def is_egg(self, search): 417 base = self.base 418 return ( 419 base == search.versionless_egg_name 420 or base.startswith(search.prefix) 421 and base.endswith('.egg')) 422 423 def search(self, name): 424 for child in self.children(): 425 n_low = child.lower() 426 if (n_low in name.exact_matches 427 or n_low.startswith(name.prefix) 428 and n_low.endswith(name.suffixes) 429 # legacy case: 430 or self.is_egg(name) and n_low == 'egg-info'): 431 yield self.joinpath(child) 432 433 434class Prepared: 435 """ 436 A prepared search for metadata on a possibly-named package. 437 """ 438 normalized = '' 439 prefix = '' 440 suffixes = '.dist-info', '.egg-info' 441 exact_matches = [''][:0] 442 versionless_egg_name = '' 443 444 def __init__(self, name): 445 self.name = name 446 if name is None: 447 return 448 self.normalized = name.lower().replace('-', '_') 449 self.prefix = self.normalized + '-' 450 self.exact_matches = [ 451 self.normalized + suffix for suffix in self.suffixes] 452 self.versionless_egg_name = self.normalized + '.egg' 453 454 455class MetadataPathFinder(DistributionFinder): 456 @classmethod 457 def find_distributions(cls, context=DistributionFinder.Context()): 458 """ 459 Find distributions. 460 461 Return an iterable of all Distribution instances capable of 462 loading the metadata for packages matching ``context.name`` 463 (or all names if ``None`` indicated) along the paths in the list 464 of directories ``context.path``. 465 """ 466 found = cls._search_paths(context.name, context.path) 467 return map(PathDistribution, found) 468 469 @classmethod 470 def _search_paths(cls, name, paths): 471 """Find metadata directories in paths heuristically.""" 472 return itertools.chain.from_iterable( 473 path.search(Prepared(name)) 474 for path in map(FastPath, paths) 475 ) 476 477 478class PathDistribution(Distribution): 479 def __init__(self, path): 480 """Construct a distribution from a path to the metadata directory. 481 482 :param path: A pathlib.Path or similar object supporting 483 .joinpath(), __div__, .parent, and .read_text(). 484 """ 485 self._path = path 486 487 def read_text(self, filename): 488 with suppress(FileNotFoundError, IsADirectoryError, KeyError, 489 NotADirectoryError, PermissionError): 490 return self._path.joinpath(filename).read_text(encoding='utf-8') 491 read_text.__doc__ = Distribution.read_text.__doc__ 492 493 def locate_file(self, path): 494 return self._path.parent / path 495 496 497def distribution(distribution_name): 498 """Get the ``Distribution`` instance for the named package. 499 500 :param distribution_name: The name of the distribution package as a string. 501 :return: A ``Distribution`` instance (or subclass thereof). 502 """ 503 return Distribution.from_name(distribution_name) 504 505 506def distributions(**kwargs): 507 """Get all ``Distribution`` instances in the current environment. 508 509 :return: An iterable of ``Distribution`` instances. 510 """ 511 return Distribution.discover(**kwargs) 512 513 514def metadata(distribution_name): 515 """Get the metadata for the named package. 516 517 :param distribution_name: The name of the distribution package to query. 518 :return: An email.Message containing the parsed metadata. 519 """ 520 return Distribution.from_name(distribution_name).metadata 521 522 523def version(distribution_name): 524 """Get the version string for the named package. 525 526 :param distribution_name: The name of the distribution package to query. 527 :return: The version string for the package as defined in the package's 528 "Version" metadata key. 529 """ 530 return distribution(distribution_name).version 531 532 533def entry_points(): 534 """Return EntryPoint objects for all installed packages. 535 536 :return: EntryPoint objects for all installed packages. 537 """ 538 eps = itertools.chain.from_iterable( 539 dist.entry_points for dist in distributions()) 540 by_group = operator.attrgetter('group') 541 ordered = sorted(eps, key=by_group) 542 grouped = itertools.groupby(ordered, by_group) 543 return { 544 group: tuple(eps) 545 for group, eps in grouped 546 } 547 548 549def files(distribution_name): 550 """Return a list of files for the named package. 551 552 :param distribution_name: The name of the distribution package to query. 553 :return: List of files composing the distribution. 554 """ 555 return distribution(distribution_name).files 556 557 558def requires(distribution_name): 559 """ 560 Return a list of requirements for the named package. 561 562 :return: An iterator of requirements, suitable for 563 packaging.requirement.Requirement. 564 """ 565 return distribution(distribution_name).requires 566