1from __future__ import unicode_literals, absolute_import
2
3import io
4import os
5import re
6import abc
7import csv
8import sys
9import zipp
10import operator
11import functools
12import itertools
13import posixpath
14import collections
15
16from ._compat import (
17    install,
18    NullFinder,
19    ConfigParser,
20    suppress,
21    map,
22    FileNotFoundError,
23    IsADirectoryError,
24    NotADirectoryError,
25    PermissionError,
26    pathlib,
27    ModuleNotFoundError,
28    MetaPathFinder,
29    email_message_from_string,
30    PyPy_repr,
31    unique_ordered,
32    str,
33    )
34from importlib import import_module
35from itertools import starmap
36
37
38__metaclass__ = type
39
40
41__all__ = [
42    'Distribution',
43    'DistributionFinder',
44    'PackageNotFoundError',
45    'distribution',
46    'distributions',
47    'entry_points',
48    'files',
49    'metadata',
50    'requires',
51    'version',
52    ]
53
54
55class PackageNotFoundError(ModuleNotFoundError):
56    """The package was not found."""
57
58    def __str__(self):
59        tmpl = "No package metadata was found for {self.name}"
60        return tmpl.format(**locals())
61
62    @property
63    def name(self):
64        name, = self.args
65        return name
66
67
68class EntryPoint(
69        PyPy_repr,
70        collections.namedtuple('EntryPointBase', 'name value group')):
71    """An entry point as defined by Python packaging conventions.
72
73    See `the packaging docs on entry points
74    <https://packaging.python.org/specifications/entry-points/>`_
75    for more information.
76    """
77
78    pattern = re.compile(
79        r'(?P<module>[\w.]+)\s*'
80        r'(:\s*(?P<attr>[\w.]+))?\s*'
81        r'(?P<extras>\[.*\])?\s*$'
82        )
83    """
84    A regular expression describing the syntax for an entry point,
85    which might look like:
86
87        - module
88        - package.module
89        - package.module:attribute
90        - package.module:object.attribute
91        - package.module:attr [extra1, extra2]
92
93    Other combinations are possible as well.
94
95    The expression is lenient about whitespace around the ':',
96    following the attr, and following any extras.
97    """
98
99    def load(self):
100        """Load the entry point from its definition. If only a module
101        is indicated by the value, return that module. Otherwise,
102        return the named object.
103        """
104        match = self.pattern.match(self.value)
105        module = import_module(match.group('module'))
106        attrs = filter(None, (match.group('attr') or '').split('.'))
107        return functools.reduce(getattr, attrs, module)
108
109    @property
110    def module(self):
111        match = self.pattern.match(self.value)
112        return match.group('module')
113
114    @property
115    def attr(self):
116        match = self.pattern.match(self.value)
117        return match.group('attr')
118
119    @property
120    def extras(self):
121        match = self.pattern.match(self.value)
122        return list(re.finditer(r'\w+', match.group('extras') or ''))
123
124    @classmethod
125    def _from_config(cls, config):
126        return [
127            cls(name, value, group)
128            for group in config.sections()
129            for name, value in config.items(group)
130            ]
131
132    @classmethod
133    def _from_text(cls, text):
134        config = ConfigParser(delimiters='=')
135        # case sensitive: https://stackoverflow.com/q/1611799/812183
136        config.optionxform = str
137        try:
138            config.read_string(text)
139        except AttributeError:  # pragma: nocover
140            # Python 2 has no read_string
141            config.readfp(io.StringIO(text))
142        return EntryPoint._from_config(config)
143
144    def __iter__(self):
145        """
146        Supply iter so one may construct dicts of EntryPoints easily.
147        """
148        return iter((self.name, self))
149
150    def __reduce__(self):
151        return (
152            self.__class__,
153            (self.name, self.value, self.group),
154            )
155
156
157class PackagePath(pathlib.PurePosixPath):
158    """A reference to a path in a package"""
159
160    def read_text(self, encoding='utf-8'):
161        with self.locate().open(encoding=encoding) as stream:
162            return stream.read()
163
164    def read_binary(self):
165        with self.locate().open('rb') as stream:
166            return stream.read()
167
168    def locate(self):
169        """Return a path-like object for this path"""
170        return self.dist.locate_file(self)
171
172
173class FileHash:
174    def __init__(self, spec):
175        self.mode, _, self.value = spec.partition('=')
176
177    def __repr__(self):
178        return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
179
180
181class Distribution:
182    """A Python distribution package."""
183
184    @abc.abstractmethod
185    def read_text(self, filename):
186        """Attempt to load metadata file given by the name.
187
188        :param filename: The name of the file in the distribution info.
189        :return: The text if found, otherwise None.
190        """
191
192    @abc.abstractmethod
193    def locate_file(self, path):
194        """
195        Given a path to a file in this distribution, return a path
196        to it.
197        """
198
199    @classmethod
200    def from_name(cls, name):
201        """Return the Distribution for the given package name.
202
203        :param name: The name of the distribution package to search for.
204        :return: The Distribution instance (or subclass thereof) for the named
205            package, if found.
206        :raises PackageNotFoundError: When the named package's distribution
207            metadata cannot be found.
208        """
209        for resolver in cls._discover_resolvers():
210            dists = resolver(DistributionFinder.Context(name=name))
211            dist = next(iter(dists), None)
212            if dist is not None:
213                return dist
214        else:
215            raise PackageNotFoundError(name)
216
217    @classmethod
218    def discover(cls, **kwargs):
219        """Return an iterable of Distribution objects for all packages.
220
221        Pass a ``context`` or pass keyword arguments for constructing
222        a context.
223
224        :context: A ``DistributionFinder.Context`` object.
225        :return: Iterable of Distribution objects for all packages.
226        """
227        context = kwargs.pop('context', None)
228        if context and kwargs:
229            raise ValueError("cannot accept context and kwargs")
230        context = context or DistributionFinder.Context(**kwargs)
231        return itertools.chain.from_iterable(
232            resolver(context)
233            for resolver in cls._discover_resolvers()
234            )
235
236    @staticmethod
237    def at(path):
238        """Return a Distribution for the indicated metadata path
239
240        :param path: a string or path-like object
241        :return: a concrete Distribution instance for the path
242        """
243        return PathDistribution(pathlib.Path(path))
244
245    @staticmethod
246    def _discover_resolvers():
247        """Search the meta_path for resolvers."""
248        declared = (
249            getattr(finder, 'find_distributions', None)
250            for finder in sys.meta_path
251            )
252        return filter(None, declared)
253
254    @classmethod
255    def _local(cls, root='.'):
256        from pep517 import build, meta
257        system = build.compat_system(root)
258        builder = functools.partial(
259            meta.build,
260            source_dir=root,
261            system=system,
262            )
263        return PathDistribution(zipp.Path(meta.build_as_zip(builder)))
264
265    @property
266    def metadata(self):
267        """Return the parsed metadata for this Distribution.
268
269        The returned object will have keys that name the various bits of
270        metadata.  See PEP 566 for details.
271        """
272        text = (
273            self.read_text('METADATA')
274            or self.read_text('PKG-INFO')
275            # This last clause is here to support old egg-info files.  Its
276            # effect is to just end up using the PathDistribution's self._path
277            # (which points to the egg-info file) attribute unchanged.
278            or self.read_text('')
279            )
280        return email_message_from_string(text)
281
282    @property
283    def version(self):
284        """Return the 'Version' metadata for the distribution package."""
285        return self.metadata['Version']
286
287    @property
288    def entry_points(self):
289        return EntryPoint._from_text(self.read_text('entry_points.txt'))
290
291    @property
292    def files(self):
293        """Files in this distribution.
294
295        :return: List of PackagePath for this distribution or None
296
297        Result is `None` if the metadata file that enumerates files
298        (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
299        missing.
300        Result may be empty if the metadata exists but is empty.
301        """
302        file_lines = self._read_files_distinfo() or self._read_files_egginfo()
303
304        def make_file(name, hash=None, size_str=None):
305            result = PackagePath(name)
306            result.hash = FileHash(hash) if hash else None
307            result.size = int(size_str) if size_str else None
308            result.dist = self
309            return result
310
311        return file_lines and list(starmap(make_file, csv.reader(file_lines)))
312
313    def _read_files_distinfo(self):
314        """
315        Read the lines of RECORD
316        """
317        text = self.read_text('RECORD')
318        return text and text.splitlines()
319
320    def _read_files_egginfo(self):
321        """
322        SOURCES.txt might contain literal commas, so wrap each line
323        in quotes.
324        """
325        text = self.read_text('SOURCES.txt')
326        return text and map('"{}"'.format, text.splitlines())
327
328    @property
329    def requires(self):
330        """Generated requirements specified for this Distribution"""
331        reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
332        return reqs and list(reqs)
333
334    def _read_dist_info_reqs(self):
335        return self.metadata.get_all('Requires-Dist')
336
337    def _read_egg_info_reqs(self):
338        source = self.read_text('requires.txt')
339        return source and self._deps_from_requires_text(source)
340
341    @classmethod
342    def _deps_from_requires_text(cls, source):
343        section_pairs = cls._read_sections(source.splitlines())
344        sections = {
345            section: list(map(operator.itemgetter('line'), results))
346            for section, results in
347            itertools.groupby(section_pairs, operator.itemgetter('section'))
348            }
349        return cls._convert_egg_info_reqs_to_simple_reqs(sections)
350
351    @staticmethod
352    def _read_sections(lines):
353        section = None
354        for line in filter(None, lines):
355            section_match = re.match(r'\[(.*)\]$', line)
356            if section_match:
357                section = section_match.group(1)
358                continue
359            yield locals()
360
361    @staticmethod
362    def _convert_egg_info_reqs_to_simple_reqs(sections):
363        """
364        Historically, setuptools would solicit and store 'extra'
365        requirements, including those with environment markers,
366        in separate sections. More modern tools expect each
367        dependency to be defined separately, with any relevant
368        extras and environment markers attached directly to that
369        requirement. This method converts the former to the
370        latter. See _test_deps_from_requires_text for an example.
371        """
372        def make_condition(name):
373            return name and 'extra == "{name}"'.format(name=name)
374
375        def parse_condition(section):
376            section = section or ''
377            extra, sep, markers = section.partition(':')
378            if extra and markers:
379                markers = '({markers})'.format(markers=markers)
380            conditions = list(filter(None, [markers, make_condition(extra)]))
381            return '; ' + ' and '.join(conditions) if conditions else ''
382
383        for section, deps in sections.items():
384            for dep in deps:
385                yield dep + parse_condition(section)
386
387
388class DistributionFinder(MetaPathFinder):
389    """
390    A MetaPathFinder capable of discovering installed distributions.
391    """
392
393    class Context:
394        """
395        Keyword arguments presented by the caller to
396        ``distributions()`` or ``Distribution.discover()``
397        to narrow the scope of a search for distributions
398        in all DistributionFinders.
399
400        Each DistributionFinder may expect any parameters
401        and should attempt to honor the canonical
402        parameters defined below when appropriate.
403        """
404
405        name = None
406        """
407        Specific name for which a distribution finder should match.
408        A name of ``None`` matches all distributions.
409        """
410
411        def __init__(self, **kwargs):
412            vars(self).update(kwargs)
413
414        @property
415        def path(self):
416            """
417            The path that a distribution finder should search.
418
419            Typically refers to Python package paths and defaults
420            to ``sys.path``.
421            """
422            return vars(self).get('path', sys.path)
423
424    @abc.abstractmethod
425    def find_distributions(self, context=Context()):
426        """
427        Find distributions.
428
429        Return an iterable of all Distribution instances capable of
430        loading the metadata for packages matching the ``context``,
431        a DistributionFinder.Context instance.
432        """
433
434
435class FastPath:
436    """
437    Micro-optimized class for searching a path for
438    children.
439    """
440
441    def __init__(self, root):
442        self.root = str(root)
443        self.base = os.path.basename(self.root).lower()
444
445    def joinpath(self, child):
446        return pathlib.Path(self.root, child)
447
448    def children(self):
449        with suppress(Exception):
450            return os.listdir(self.root or '')
451        with suppress(Exception):
452            return self.zip_children()
453        return []
454
455    def zip_children(self):
456        zip_path = zipp.Path(self.root)
457        names = zip_path.root.namelist()
458        self.joinpath = zip_path.joinpath
459
460        return unique_ordered(
461            child.split(posixpath.sep, 1)[0]
462            for child in names
463            )
464
465    def is_egg(self, search):
466        base = self.base
467        return (
468            base == search.versionless_egg_name
469            or base.startswith(search.prefix)
470            and base.endswith('.egg'))
471
472    def search(self, name):
473        for child in self.children():
474            n_low = child.lower()
475            if (n_low in name.exact_matches
476                    or n_low.replace('.', '_').startswith(name.prefix)
477                    and n_low.endswith(name.suffixes)
478                    # legacy case:
479                    or self.is_egg(name) and n_low == 'egg-info'):
480                yield self.joinpath(child)
481
482
483class Prepared:
484    """
485    A prepared search for metadata on a possibly-named package.
486    """
487    normalized = ''
488    prefix = ''
489    suffixes = '.dist-info', '.egg-info'
490    exact_matches = [''][:0]
491    versionless_egg_name = ''
492
493    def __init__(self, name):
494        self.name = name
495        if name is None:
496            return
497        self.normalized = self.normalize(name)
498        self.prefix = self.normalized + '-'
499        self.exact_matches = [
500            self.normalized + suffix for suffix in self.suffixes]
501        self.versionless_egg_name = self.normalized + '.egg'
502
503    @staticmethod
504    def normalize(name):
505        """
506        PEP 503 normalization plus dashes as underscores.
507        """
508        return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
509
510
511@install
512class MetadataPathFinder(NullFinder, DistributionFinder):
513    """A degenerate finder for distribution packages on the file system.
514
515    This finder supplies only a find_distributions() method for versions
516    of Python that do not have a PathFinder find_distributions().
517    """
518
519    def find_distributions(self, context=DistributionFinder.Context()):
520        """
521        Find distributions.
522
523        Return an iterable of all Distribution instances capable of
524        loading the metadata for packages matching ``context.name``
525        (or all names if ``None`` indicated) along the paths in the list
526        of directories ``context.path``.
527        """
528        found = self._search_paths(context.name, context.path)
529        return map(PathDistribution, found)
530
531    @classmethod
532    def _search_paths(cls, name, paths):
533        """Find metadata directories in paths heuristically."""
534        return itertools.chain.from_iterable(
535            path.search(Prepared(name))
536            for path in map(FastPath, paths)
537            )
538
539
540class PathDistribution(Distribution):
541    def __init__(self, path):
542        """Construct a distribution from a path to the metadata directory.
543
544        :param path: A pathlib.Path or similar object supporting
545                     .joinpath(), __div__, .parent, and .read_text().
546        """
547        self._path = path
548
549    def read_text(self, filename):
550        with suppress(FileNotFoundError, IsADirectoryError, KeyError,
551                      NotADirectoryError, PermissionError):
552            return self._path.joinpath(filename).read_text(encoding='utf-8')
553    read_text.__doc__ = Distribution.read_text.__doc__
554
555    def locate_file(self, path):
556        return self._path.parent / path
557
558
559def distribution(distribution_name):
560    """Get the ``Distribution`` instance for the named package.
561
562    :param distribution_name: The name of the distribution package as a string.
563    :return: A ``Distribution`` instance (or subclass thereof).
564    """
565    return Distribution.from_name(distribution_name)
566
567
568def distributions(**kwargs):
569    """Get all ``Distribution`` instances in the current environment.
570
571    :return: An iterable of ``Distribution`` instances.
572    """
573    return Distribution.discover(**kwargs)
574
575
576def metadata(distribution_name):
577    """Get the metadata for the named package.
578
579    :param distribution_name: The name of the distribution package to query.
580    :return: An email.Message containing the parsed metadata.
581    """
582    return Distribution.from_name(distribution_name).metadata
583
584
585def version(distribution_name):
586    """Get the version string for the named package.
587
588    :param distribution_name: The name of the distribution package to query.
589    :return: The version string for the package as defined in the package's
590        "Version" metadata key.
591    """
592    return distribution(distribution_name).version
593
594
595def entry_points():
596    """Return EntryPoint objects for all installed packages.
597
598    :return: EntryPoint objects for all installed packages.
599    """
600    eps = itertools.chain.from_iterable(
601        dist.entry_points for dist in distributions())
602    by_group = operator.attrgetter('group')
603    ordered = sorted(eps, key=by_group)
604    grouped = itertools.groupby(ordered, by_group)
605    return {
606        group: tuple(eps)
607        for group, eps in grouped
608        }
609
610
611def files(distribution_name):
612    """Return a list of files for the named package.
613
614    :param distribution_name: The name of the distribution package to query.
615    :return: List of files composing the distribution.
616    """
617    return distribution(distribution_name).files
618
619
620def requires(distribution_name):
621    """
622    Return a list of requirements for the named package.
623
624    :return: An iterator of requirements, suitable for
625    packaging.requirement.Requirement.
626    """
627    return distribution(distribution_name).requires
628