1from __future__ import unicode_literals, absolute_import
2
3import io
4import os
5import re
6import abc
7import csv
8import sys
9import zipp
10import operator
11import functools
12import itertools
13import collections
14
15from ._compat import (
16    install,
17    NullFinder,
18    ConfigParser,
19    suppress,
20    map,
21    FileNotFoundError,
22    IsADirectoryError,
23    NotADirectoryError,
24    PermissionError,
25    pathlib,
26    PYPY_OPEN_BUG,
27    ModuleNotFoundError,
28    MetaPathFinder,
29    email_message_from_string,
30    ensure_is_path,
31    PyPy_repr,
32    )
33from importlib import import_module
34from itertools import starmap
35
36
37__metaclass__ = type
38
39
40__all__ = [
41    'Distribution',
42    'DistributionFinder',
43    'PackageNotFoundError',
44    'distribution',
45    'distributions',
46    'entry_points',
47    'files',
48    'metadata',
49    'requires',
50    'version',
51    ]
52
53
54class PackageNotFoundError(ModuleNotFoundError):
55    """The package was not found."""
56
57
58class EntryPoint(
59        PyPy_repr,
60        collections.namedtuple('EntryPointBase', 'name value group')):
61    """An entry point as defined by Python packaging conventions.
62
63    See `the packaging docs on entry points
64    <https://packaging.python.org/specifications/entry-points/>`_
65    for more information.
66    """
67
68    pattern = re.compile(
69        r'(?P<module>[\w.]+)\s*'
70        r'(:\s*(?P<attr>[\w.]+))?\s*'
71        r'(?P<extras>\[.*\])?\s*$'
72        )
73    """
74    A regular expression describing the syntax for an entry point,
75    which might look like:
76
77        - module
78        - package.module
79        - package.module:attribute
80        - package.module:object.attribute
81        - package.module:attr [extra1, extra2]
82
83    Other combinations are possible as well.
84
85    The expression is lenient about whitespace around the ':',
86    following the attr, and following any extras.
87    """
88
89    def load(self):
90        """Load the entry point from its definition. If only a module
91        is indicated by the value, return that module. Otherwise,
92        return the named object.
93        """
94        match = self.pattern.match(self.value)
95        module = import_module(match.group('module'))
96        attrs = filter(None, (match.group('attr') or '').split('.'))
97        return functools.reduce(getattr, attrs, module)
98
99    @property
100    def extras(self):
101        match = self.pattern.match(self.value)
102        return list(re.finditer(r'\w+', match.group('extras') or ''))
103
104    @classmethod
105    def _from_config(cls, config):
106        return [
107            cls(name, value, group)
108            for group in config.sections()
109            for name, value in config.items(group)
110            ]
111
112    @classmethod
113    def _from_text(cls, text):
114        config = ConfigParser(delimiters='=')
115        # case sensitive: https://stackoverflow.com/q/1611799/812183
116        config.optionxform = str
117        try:
118            config.read_string(text)
119        except AttributeError:  # pragma: nocover
120            # Python 2 has no read_string
121            config.readfp(io.StringIO(text))
122        return EntryPoint._from_config(config)
123
124    def __iter__(self):
125        """
126        Supply iter so one may construct dicts of EntryPoints easily.
127        """
128        return iter((self.name, self))
129
130    def __reduce__(self):
131        return (
132            self.__class__,
133            (self.name, self.value, self.group),
134            )
135
136
137class PackagePath(pathlib.PurePosixPath):
138    """A reference to a path in a package"""
139
140    def read_text(self, encoding='utf-8'):
141        with self.locate().open(encoding=encoding) as stream:
142            return stream.read()
143
144    def read_binary(self):
145        with self.locate().open('rb') as stream:
146            return stream.read()
147
148    def locate(self):
149        """Return a path-like object for this path"""
150        return self.dist.locate_file(self)
151
152
153class FileHash:
154    def __init__(self, spec):
155        self.mode, _, self.value = spec.partition('=')
156
157    def __repr__(self):
158        return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
159
160
161class Distribution:
162    """A Python distribution package."""
163
164    @abc.abstractmethod
165    def read_text(self, filename):
166        """Attempt to load metadata file given by the name.
167
168        :param filename: The name of the file in the distribution info.
169        :return: The text if found, otherwise None.
170        """
171
172    @abc.abstractmethod
173    def locate_file(self, path):
174        """
175        Given a path to a file in this distribution, return a path
176        to it.
177        """
178
179    @classmethod
180    def from_name(cls, name):
181        """Return the Distribution for the given package name.
182
183        :param name: The name of the distribution package to search for.
184        :return: The Distribution instance (or subclass thereof) for the named
185            package, if found.
186        :raises PackageNotFoundError: When the named package's distribution
187            metadata cannot be found.
188        """
189        for resolver in cls._discover_resolvers():
190            dists = resolver(DistributionFinder.Context(name=name))
191            dist = next(dists, None)
192            if dist is not None:
193                return dist
194        else:
195            raise PackageNotFoundError(name)
196
197    @classmethod
198    def discover(cls, **kwargs):
199        """Return an iterable of Distribution objects for all packages.
200
201        Pass a ``context`` or pass keyword arguments for constructing
202        a context.
203
204        :context: A ``DistributionFinder.Context`` object.
205        :return: Iterable of Distribution objects for all packages.
206        """
207        context = kwargs.pop('context', None)
208        if context and kwargs:
209            raise ValueError("cannot accept context and kwargs")
210        context = context or DistributionFinder.Context(**kwargs)
211        return itertools.chain.from_iterable(
212            resolver(context)
213            for resolver in cls._discover_resolvers()
214            )
215
216    @staticmethod
217    def at(path):
218        """Return a Distribution for the indicated metadata path
219
220        :param path: a string or path-like object
221        :return: a concrete Distribution instance for the path
222        """
223        return PathDistribution(ensure_is_path(path))
224
225    @staticmethod
226    def _discover_resolvers():
227        """Search the meta_path for resolvers."""
228        declared = (
229            getattr(finder, 'find_distributions', None)
230            for finder in sys.meta_path
231            )
232        return filter(None, declared)
233
234    @property
235    def metadata(self):
236        """Return the parsed metadata for this Distribution.
237
238        The returned object will have keys that name the various bits of
239        metadata.  See PEP 566 for details.
240        """
241        text = (
242            self.read_text('METADATA')
243            or self.read_text('PKG-INFO')
244            # This last clause is here to support old egg-info files.  Its
245            # effect is to just end up using the PathDistribution's self._path
246            # (which points to the egg-info file) attribute unchanged.
247            or self.read_text('')
248            )
249        return email_message_from_string(text)
250
251    @property
252    def version(self):
253        """Return the 'Version' metadata for the distribution package."""
254        return self.metadata['Version']
255
256    @property
257    def entry_points(self):
258        return EntryPoint._from_text(self.read_text('entry_points.txt'))
259
260    @property
261    def files(self):
262        """Files in this distribution.
263
264        :return: List of PackagePath for this distribution or None
265
266        Result is `None` if the metadata file that enumerates files
267        (i.e. RECORD for dist-info or SOURCES.txt for egg-info) is
268        missing.
269        Result may be empty if the metadata exists but is empty.
270        """
271        file_lines = self._read_files_distinfo() or self._read_files_egginfo()
272
273        def make_file(name, hash=None, size_str=None):
274            result = PackagePath(name)
275            result.hash = FileHash(hash) if hash else None
276            result.size = int(size_str) if size_str else None
277            result.dist = self
278            return result
279
280        return file_lines and list(starmap(make_file, csv.reader(file_lines)))
281
282    def _read_files_distinfo(self):
283        """
284        Read the lines of RECORD
285        """
286        text = self.read_text('RECORD')
287        return text and text.splitlines()
288
289    def _read_files_egginfo(self):
290        """
291        SOURCES.txt might contain literal commas, so wrap each line
292        in quotes.
293        """
294        text = self.read_text('SOURCES.txt')
295        return text and map('"{}"'.format, text.splitlines())
296
297    @property
298    def requires(self):
299        """Generated requirements specified for this Distribution"""
300        reqs = self._read_dist_info_reqs() or self._read_egg_info_reqs()
301        return reqs and list(reqs)
302
303    def _read_dist_info_reqs(self):
304        return self.metadata.get_all('Requires-Dist')
305
306    def _read_egg_info_reqs(self):
307        source = self.read_text('requires.txt')
308        return source and self._deps_from_requires_text(source)
309
310    @classmethod
311    def _deps_from_requires_text(cls, source):
312        section_pairs = cls._read_sections(source.splitlines())
313        sections = {
314            section: list(map(operator.itemgetter('line'), results))
315            for section, results in
316            itertools.groupby(section_pairs, operator.itemgetter('section'))
317            }
318        return cls._convert_egg_info_reqs_to_simple_reqs(sections)
319
320    @staticmethod
321    def _read_sections(lines):
322        section = None
323        for line in filter(None, lines):
324            section_match = re.match(r'\[(.*)\]$', line)
325            if section_match:
326                section = section_match.group(1)
327                continue
328            yield locals()
329
330    @staticmethod
331    def _convert_egg_info_reqs_to_simple_reqs(sections):
332        """
333        Historically, setuptools would solicit and store 'extra'
334        requirements, including those with environment markers,
335        in separate sections. More modern tools expect each
336        dependency to be defined separately, with any relevant
337        extras and environment markers attached directly to that
338        requirement. This method converts the former to the
339        latter. See _test_deps_from_requires_text for an example.
340        """
341        def make_condition(name):
342            return name and 'extra == "{name}"'.format(name=name)
343
344        def parse_condition(section):
345            section = section or ''
346            extra, sep, markers = section.partition(':')
347            if extra and markers:
348                markers = '({markers})'.format(markers=markers)
349            conditions = list(filter(None, [markers, make_condition(extra)]))
350            return '; ' + ' and '.join(conditions) if conditions else ''
351
352        for section, deps in sections.items():
353            for dep in deps:
354                yield dep + parse_condition(section)
355
356
357class DistributionFinder(MetaPathFinder):
358    """
359    A MetaPathFinder capable of discovering installed distributions.
360    """
361
362    class Context:
363
364        name = None
365        """
366        Specific name for which a distribution finder should match.
367        """
368
369        def __init__(self, **kwargs):
370            vars(self).update(kwargs)
371
372        @property
373        def path(self):
374            """
375            The path that a distribution finder should search.
376            """
377            return vars(self).get('path', sys.path)
378
379        @property
380        def pattern(self):
381            return '.*' if self.name is None else re.escape(self.name)
382
383    @abc.abstractmethod
384    def find_distributions(self, context=Context()):
385        """
386        Find distributions.
387
388        Return an iterable of all Distribution instances capable of
389        loading the metadata for packages matching the ``context``,
390        a DistributionFinder.Context instance.
391        """
392
393
394@install
395class MetadataPathFinder(NullFinder, DistributionFinder):
396    """A degenerate finder for distribution packages on the file system.
397
398    This finder supplies only a find_distributions() method for versions
399    of Python that do not have a PathFinder find_distributions().
400    """
401
402    def find_distributions(self, context=DistributionFinder.Context()):
403        """
404        Find distributions.
405
406        Return an iterable of all Distribution instances capable of
407        loading the metadata for packages matching ``context.name``
408        (or all names if ``None`` indicated) along the paths in the list
409        of directories ``context.path``.
410        """
411        found = self._search_paths(context.pattern, context.path)
412        return map(PathDistribution, found)
413
414    @classmethod
415    def _search_paths(cls, pattern, paths):
416        """Find metadata directories in paths heuristically."""
417        return itertools.chain.from_iterable(
418            cls._search_path(path, pattern)
419            for path in map(cls._switch_path, paths)
420            )
421
422    @staticmethod
423    def _switch_path(path):
424        if not PYPY_OPEN_BUG or os.path.isfile(path):  # pragma: no branch
425            with suppress(Exception):
426                return zipp.Path(path)
427        return pathlib.Path(path)
428
429    @classmethod
430    def _matches_info(cls, normalized, item):
431        template = r'{pattern}(-.*)?\.(dist|egg)-info'
432        manifest = template.format(pattern=normalized)
433        return re.match(manifest, item.name, flags=re.IGNORECASE)
434
435    @classmethod
436    def _matches_legacy(cls, normalized, item):
437        template = r'{pattern}-.*\.egg[\\/]EGG-INFO'
438        manifest = template.format(pattern=normalized)
439        return re.search(manifest, str(item), flags=re.IGNORECASE)
440
441    @classmethod
442    def _search_path(cls, root, pattern):
443        if not root.is_dir():
444            return ()
445        normalized = pattern.replace('-', '_')
446        return (item for item in root.iterdir()
447                if cls._matches_info(normalized, item)
448                or cls._matches_legacy(normalized, item))
449
450
451class PathDistribution(Distribution):
452    def __init__(self, path):
453        """Construct a distribution from a path to the metadata directory.
454
455        :param path: A pathlib.Path or similar object supporting
456                     .joinpath(), __div__, .parent, and .read_text().
457        """
458        self._path = path
459
460    def read_text(self, filename):
461        with suppress(FileNotFoundError, IsADirectoryError, KeyError,
462                      NotADirectoryError, PermissionError):
463            return self._path.joinpath(filename).read_text(encoding='utf-8')
464    read_text.__doc__ = Distribution.read_text.__doc__
465
466    def locate_file(self, path):
467        return self._path.parent / path
468
469
470def distribution(distribution_name):
471    """Get the ``Distribution`` instance for the named package.
472
473    :param distribution_name: The name of the distribution package as a string.
474    :return: A ``Distribution`` instance (or subclass thereof).
475    """
476    return Distribution.from_name(distribution_name)
477
478
479def distributions(**kwargs):
480    """Get all ``Distribution`` instances in the current environment.
481
482    :return: An iterable of ``Distribution`` instances.
483    """
484    return Distribution.discover(**kwargs)
485
486
487def metadata(distribution_name):
488    """Get the metadata for the named package.
489
490    :param distribution_name: The name of the distribution package to query.
491    :return: An email.Message containing the parsed metadata.
492    """
493    return Distribution.from_name(distribution_name).metadata
494
495
496def version(distribution_name):
497    """Get the version string for the named package.
498
499    :param distribution_name: The name of the distribution package to query.
500    :return: The version string for the package as defined in the package's
501        "Version" metadata key.
502    """
503    return distribution(distribution_name).version
504
505
506def entry_points():
507    """Return EntryPoint objects for all installed packages.
508
509    :return: EntryPoint objects for all installed packages.
510    """
511    eps = itertools.chain.from_iterable(
512        dist.entry_points for dist in distributions())
513    by_group = operator.attrgetter('group')
514    ordered = sorted(eps, key=by_group)
515    grouped = itertools.groupby(ordered, by_group)
516    return {
517        group: tuple(eps)
518        for group, eps in grouped
519        }
520
521
522def files(distribution_name):
523    """Return a list of files for the named package.
524
525    :param distribution_name: The name of the distribution package to query.
526    :return: List of files composing the distribution.
527    """
528    return distribution(distribution_name).files
529
530
531def requires(distribution_name):
532    """
533    Return a list of requirements for the named package.
534
535    :return: An iterator of requirements, suitable for
536    packaging.requirement.Requirement.
537    """
538    return distribution(distribution_name).requires
539
540
541__version__ = version(__name__)
542