1import io
2import os
3import re
4import abc
5import csv
6import sys
7import email
8import pathlib
9import zipfile
10import operator
11import functools
12import itertools
13import posixpath
14import collections
15
16from configparser import ConfigParser
17from contextlib import suppress
18from importlib import import_module
19from importlib.abc import MetaPathFinder
20from itertools import starmap
21
22
23__all__ = [
24    'Distribution',
25    'DistributionFinder',
26    'PackageNotFoundError',
27    'distribution',
28    'distributions',
29    'entry_points',
30    'files',
31    'metadata',
32    'requires',
33    'version',
34    ]
35
36
37class PackageNotFoundError(ModuleNotFoundError):
38    """The package was not found."""
39
40
41class EntryPoint(
42        collections.namedtuple('EntryPointBase', 'name value group')):
43    """An entry point as defined by Python packaging conventions.
44
45    See `the packaging docs on entry points
46    <https://packaging.python.org/specifications/entry-points/>`_
47    for more information.
48    """
49
50    pattern = re.compile(
51        r'(?P<module>[\w.]+)\s*'
52        r'(:\s*(?P<attr>[\w.]+))?\s*'
53        r'(?P<extras>\[.*\])?\s*$'
54        )
55    """
56    A regular expression describing the syntax for an entry point,
57    which might look like:
58
59        - module
60        - package.module
61        - package.module:attribute
62        - package.module:object.attribute
63        - package.module:attr [extra1, extra2]
64
65    Other combinations are possible as well.
66
67    The expression is lenient about whitespace around the ':',
68    following the attr, and following any extras.
69    """
70
71    def load(self):
72        """Load the entry point from its definition. If only a module
73        is indicated by the value, return that module. Otherwise,
74        return the named object.
75        """
76        match = self.pattern.match(self.value)
77        module = import_module(match.group('module'))
78        attrs = filter(None, (match.group('attr') or '').split('.'))
79        return functools.reduce(getattr, attrs, module)
80
81    @property
82    def extras(self):
83        match = self.pattern.match(self.value)
84        return list(re.finditer(r'\w+', match.group('extras') or ''))
85
86    @classmethod
87    def _from_config(cls, config):
88        return [
89            cls(name, value, group)
90            for group in config.sections()
91            for name, value in config.items(group)
92            ]
93
94    @classmethod
95    def _from_text(cls, text):
96        config = ConfigParser(delimiters='=')
97        # case sensitive: https://stackoverflow.com/q/1611799/812183
98        config.optionxform = str
99        try:
100            config.read_string(text)
101        except AttributeError:  # pragma: nocover
102            # Python 2 has no read_string
103            config.readfp(io.StringIO(text))
104        return EntryPoint._from_config(config)
105
106    def __iter__(self):
107        """
108        Supply iter so one may construct dicts of EntryPoints easily.
109        """
110        return iter((self.name, self))
111
112    def __reduce__(self):
113        return (
114            self.__class__,
115            (self.name, self.value, self.group),
116            )
117
118
119class PackagePath(pathlib.PurePosixPath):
120    """A reference to a path in a package"""
121
122    def read_text(self, encoding='utf-8'):
123        with self.locate().open(encoding=encoding) as stream:
124            return stream.read()
125
126    def read_binary(self):
127        with self.locate().open('rb') as stream:
128            return stream.read()
129
130    def locate(self):
131        """Return a path-like object for this path"""
132        return self.dist.locate_file(self)
133
134
135class FileHash:
136    def __init__(self, spec):
137        self.mode, _, self.value = spec.partition('=')
138
139    def __repr__(self):
140        return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
141
142
143class Distribution:
144    """A Python distribution package."""
145
146    @abc.abstractmethod
147    def read_text(self, filename):
148        """Attempt to load metadata file given by the name.
149
150        :param filename: The name of the file in the distribution info.
151        :return: The text if found, otherwise None.
152        """
153
154    @abc.abstractmethod
155    def locate_file(self, path):
156        """
157        Given a path to a file in this distribution, return a path
158        to it.
159        """
160
161    @classmethod
162    def from_name(cls, name):
163        """Return the Distribution for the given package name.
164
165        :param name: The name of the distribution package to search for.
166        :return: The Distribution instance (or subclass thereof) for the named
167            package, if found.
168        :raises PackageNotFoundError: When the named package's distribution
169            metadata cannot be found.
170        """
171        for resolver in cls._discover_resolvers():
172            dists = resolver(DistributionFinder.Context(name=name))
173            dist = next(dists, None)
174            if dist is not None:
175                return dist
176        else:
177            raise PackageNotFoundError(name)
178
179    @classmethod
180    def discover(cls, **kwargs):
181        """Return an iterable of Distribution objects for all packages.
182
183        Pass a ``context`` or pass keyword arguments for constructing
184        a context.
185
186        :context: A ``DistributionFinder.Context`` object.
187        :return: Iterable of Distribution objects for all packages.
188        """
189        context = kwargs.pop('context', None)
190        if context and kwargs:
191            raise ValueError("cannot accept context and kwargs")
192        context = context or DistributionFinder.Context(**kwargs)
193        return itertools.chain.from_iterable(
194            resolver(context)
195            for resolver in cls._discover_resolvers()
196            )
197
198    @staticmethod
199    def at(path):
200        """Return a Distribution for the indicated metadata path
201
202        :param path: a string or path-like object
203        :return: a concrete Distribution instance for the path
204        """
205        return PathDistribution(pathlib.Path(path))
206
207    @staticmethod
208    def _discover_resolvers():
209        """Search the meta_path for resolvers."""
210        declared = (
211            getattr(finder, 'find_distributions', None)
212            for finder in sys.meta_path
213            )
214        return filter(None, declared)
215
216    @property
217    def metadata(self):
218        """Return the parsed metadata for this Distribution.
219
220        The returned object will have keys that name the various bits of
221        metadata.  See PEP 566 for details.
222        """
223        text = (
224            self.read_text('METADATA')
225            or self.read_text('PKG-INFO')
226            # This last clause is here to support old egg-info files.  Its
227            # effect is to just end up using the PathDistribution's self._path
228            # (which points to the egg-info file) attribute unchanged.
229            or self.read_text('')
230            )
231        return email.message_from_string(text)
232
233    @property
234    def version(self):
235        """Return the 'Version' metadata for the distribution package."""
236        return self.metadata['Version']
237
238    @property
239    def entry_points(self):
240        return EntryPoint._from_text(self.read_text('entry_points.txt'))
241
242    @property
243    def files(self):
244        """Files in this distribution.
245
246        :return: List of PackagePath for this distribution or None
247
248        Result is `None` if the metadata file that enumerates files
249        (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
250        missing.
251        Result may be empty if the metadata exists but is empty.
252        """
253        file_lines = self._read_files_distinfo() or self._read_files_egginfo()
254
255        def make_file(name, hash=None, size_str=None):
256            result = PackagePath(name)
257            result.hash = FileHash(hash) if hash else None
258            result.size = int(size_str) if size_str else None
259            result.dist = self
260            return result
261
262        return file_lines and list(starmap(make_file, csv.reader(file_lines)))
263
264    def _read_files_distinfo(self):
265        """
266        Read the lines of RECORD
267        """
268        text = self.read_text('RECORD')
269        return text and text.splitlines()
270
271    def _read_files_egginfo(self):
272        """
273        SOURCES.txt might contain literal commas, so wrap each line
274        in quotes.
275        """
276        text = self.read_text('SOURCES.txt')
277        return text and map('"{}"'.format, text.splitlines())
278
279    @property
280    def requires(self):
281        """Generated requirements specified for this Distribution"""
282        reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
283        return reqs and list(reqs)
284
285    def _read_dist_info_reqs(self):
286        return self.metadata.get_all('Requires-Dist')
287
288    def _read_egg_info_reqs(self):
289        source = self.read_text('requires.txt')
290        return source and self._deps_from_requires_text(source)
291
292    @classmethod
293    def _deps_from_requires_text(cls, source):
294        section_pairs = cls._read_sections(source.splitlines())
295        sections = {
296            section: list(map(operator.itemgetter('line'), results))
297            for section, results in
298            itertools.groupby(section_pairs, operator.itemgetter('section'))
299            }
300        return cls._convert_egg_info_reqs_to_simple_reqs(sections)
301
302    @staticmethod
303    def _read_sections(lines):
304        section = None
305        for line in filter(None, lines):
306            section_match = re.match(r'\[(.*)\]$', line)
307            if section_match:
308                section = section_match.group(1)
309                continue
310            yield locals()
311
312    @staticmethod
313    def _convert_egg_info_reqs_to_simple_reqs(sections):
314        """
315        Historically, setuptools would solicit and store 'extra'
316        requirements, including those with environment markers,
317        in separate sections. More modern tools expect each
318        dependency to be defined separately, with any relevant
319        extras and environment markers attached directly to that
320        requirement. This method converts the former to the
321        latter. See _test_deps_from_requires_text for an example.
322        """
323        def make_condition(name):
324            return name and 'extra == "{name}"'.format(name=name)
325
326        def parse_condition(section):
327            section = section or ''
328            extra, sep, markers = section.partition(':')
329            if extra and markers:
330                markers = '({markers})'.format(markers=markers)
331            conditions = list(filter(None, [markers, make_condition(extra)]))
332            return '; ' + ' and '.join(conditions) if conditions else ''
333
334        for section, deps in sections.items():
335            for dep in deps:
336                yield dep + parse_condition(section)
337
338
339class DistributionFinder(MetaPathFinder):
340    """
341    A MetaPathFinder capable of discovering installed distributions.
342    """
343
344    class Context:
345        """
346        Keyword arguments presented by the caller to
347        ``distributions()`` or ``Distribution.discover()``
348        to narrow the scope of a search for distributions
349        in all DistributionFinders.
350
351        Each DistributionFinder may expect any parameters
352        and should attempt to honor the canonical
353        parameters defined below when appropriate.
354        """
355
356        name = None
357        """
358        Specific name for which a distribution finder should match.
359        A name of ``None`` matches all distributions.
360        """
361
362        def __init__(self, **kwargs):
363            vars(self).update(kwargs)
364
365        @property
366        def path(self):
367            """
368            The path that a distribution finder should search.
369
370            Typically refers to Python package paths and defaults
371            to ``sys.path``.
372            """
373            return vars(self).get('path', sys.path)
374
375    @abc.abstractmethod
376    def find_distributions(self, context=Context()):
377        """
378        Find distributions.
379
380        Return an iterable of all Distribution instances capable of
381        loading the metadata for packages matching the ``context``,
382        a DistributionFinder.Context instance.
383        """
384
385
386class FastPath:
387    """
388    Micro-optimized class for searching a path for
389    children.
390    """
391
392    def __init__(self, root):
393        self.root = root
394        self.base = os.path.basename(root).lower()
395
396    def joinpath(self, child):
397        return pathlib.Path(self.root, child)
398
399    def children(self):
400        with suppress(Exception):
401            return os.listdir(self.root or '')
402        with suppress(Exception):
403            return self.zip_children()
404        return []
405
406    def zip_children(self):
407        zip_path = zipfile.Path(self.root)
408        names = zip_path.root.namelist()
409        self.joinpath = zip_path.joinpath
410
411        return dict.fromkeys(
412            child.split(posixpath.sep, 1)[0]
413            for child in names
414            )
415
416    def is_egg(self, search):
417        base = self.base
418        return (
419            base == search.versionless_egg_name
420            or base.startswith(search.prefix)
421            and base.endswith('.egg'))
422
423    def search(self, name):
424        for child in self.children():
425            n_low = child.lower()
426            if (n_low in name.exact_matches
427                    or n_low.startswith(name.prefix)
428                    and n_low.endswith(name.suffixes)
429                    # legacy case:
430                    or self.is_egg(name) and n_low == 'egg-info'):
431                yield self.joinpath(child)
432
433
434class Prepared:
435    """
436    A prepared search for metadata on a possibly-named package.
437    """
438    normalized = ''
439    prefix = ''
440    suffixes = '.dist-info', '.egg-info'
441    exact_matches = [''][:0]
442    versionless_egg_name = ''
443
444    def __init__(self, name):
445        self.name = name
446        if name is None:
447            return
448        self.normalized = name.lower().replace('-', '_')
449        self.prefix = self.normalized + '-'
450        self.exact_matches = [
451            self.normalized + suffix for suffix in self.suffixes]
452        self.versionless_egg_name = self.normalized + '.egg'
453
454
455class MetadataPathFinder(DistributionFinder):
456    @classmethod
457    def find_distributions(cls, context=DistributionFinder.Context()):
458        """
459        Find distributions.
460
461        Return an iterable of all Distribution instances capable of
462        loading the metadata for packages matching ``context.name``
463        (or all names if ``None`` indicated) along the paths in the list
464        of directories ``context.path``.
465        """
466        found = cls._search_paths(context.name, context.path)
467        return map(PathDistribution, found)
468
469    @classmethod
470    def _search_paths(cls, name, paths):
471        """Find metadata directories in paths heuristically."""
472        return itertools.chain.from_iterable(
473            path.search(Prepared(name))
474            for path in map(FastPath, paths)
475            )
476
477
478class PathDistribution(Distribution):
479    def __init__(self, path):
480        """Construct a distribution from a path to the metadata directory.
481
482        :param path: A pathlib.Path or similar object supporting
483                     .joinpath(), __div__, .parent, and .read_text().
484        """
485        self._path = path
486
487    def read_text(self, filename):
488        with suppress(FileNotFoundError, IsADirectoryError, KeyError,
489                      NotADirectoryError, PermissionError):
490            return self._path.joinpath(filename).read_text(encoding='utf-8')
491    read_text.__doc__ = Distribution.read_text.__doc__
492
493    def locate_file(self, path):
494        return self._path.parent / path
495
496
497def distribution(distribution_name):
498    """Get the ``Distribution`` instance for the named package.
499
500    :param distribution_name: The name of the distribution package as a string.
501    :return: A ``Distribution`` instance (or subclass thereof).
502    """
503    return Distribution.from_name(distribution_name)
504
505
506def distributions(**kwargs):
507    """Get all ``Distribution`` instances in the current environment.
508
509    :return: An iterable of ``Distribution`` instances.
510    """
511    return Distribution.discover(**kwargs)
512
513
514def metadata(distribution_name):
515    """Get the metadata for the named package.
516
517    :param distribution_name: The name of the distribution package to query.
518    :return: An email.Message containing the parsed metadata.
519    """
520    return Distribution.from_name(distribution_name).metadata
521
522
523def version(distribution_name):
524    """Get the version string for the named package.
525
526    :param distribution_name: The name of the distribution package to query.
527    :return: The version string for the package as defined in the package's
528        "Version" metadata key.
529    """
530    return distribution(distribution_name).version
531
532
533def entry_points():
534    """Return EntryPoint objects for all installed packages.
535
536    :return: EntryPoint objects for all installed packages.
537    """
538    eps = itertools.chain.from_iterable(
539        dist.entry_points for dist in distributions())
540    by_group = operator.attrgetter('group')
541    ordered = sorted(eps, key=by_group)
542    grouped = itertools.groupby(ordered, by_group)
543    return {
544        group: tuple(eps)
545        for group, eps in grouped
546        }
547
548
549def files(distribution_name):
550    """Return a list of files for the named package.
551
552    :param distribution_name: The name of the distribution package to query.
553    :return: List of files composing the distribution.
554    """
555    return distribution(distribution_name).files
556
557
558def requires(distribution_name):
559    """
560    Return a list of requirements for the named package.
561
562    :return: An iterator of requirements, suitable for
563    packaging.requirement.Requirement.
564    """
565    return distribution(distribution_name).requires
566