1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2012-2017 The Python Software Foundation.
4# See LICENSE.txt and CONTRIBUTORS.txt.
5#
6"""PEP 376 implementation."""
7
8from __future__ import unicode_literals
9
10import base64
11import codecs
12import contextlib
13import hashlib
14import logging
15import os
16import posixpath
17import sys
18import zipimport
19
20from . import DistlibException, resources
21from .compat import StringIO
22from .version import get_scheme, UnsupportedVersionError
23from .metadata import (Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME,
24                       LEGACY_METADATA_FILENAME)
25from .util import (parse_requirement, cached_property, parse_name_and_version,
26                   read_exports, write_exports, CSVReader, CSVWriter)
27
28
29__all__ = ['Distribution', 'BaseInstalledDistribution',
30           'InstalledDistribution', 'EggInfoDistribution',
31           'DistributionPath']
32
33
34logger = logging.getLogger(__name__)
35
36EXPORTS_FILENAME = 'pydist-exports.json'
37COMMANDS_FILENAME = 'pydist-commands.json'
38
39DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED',
40              'RESOURCES', EXPORTS_FILENAME, 'SHARED')
41
42DISTINFO_EXT = '.dist-info'
43
44
45class _Cache(object):
46    """
47    A simple cache mapping names and .dist-info paths to distributions
48    """
49    def __init__(self):
50        """
51        Initialise an instance. There is normally one for each DistributionPath.
52        """
53        self.name = {}
54        self.path = {}
55        self.generated = False
56
57    def clear(self):
58        """
59        Clear the cache, setting it to its initial state.
60        """
61        self.name.clear()
62        self.path.clear()
63        self.generated = False
64
65    def add(self, dist):
66        """
67        Add a distribution to the cache.
68        :param dist: The distribution to add.
69        """
70        if dist.path not in self.path:
71            self.path[dist.path] = dist
72            self.name.setdefault(dist.key, []).append(dist)
73
74
75class DistributionPath(object):
76    """
77    Represents a set of distributions installed on a path (typically sys.path).
78    """
79    def __init__(self, path=None, include_egg=False):
80        """
81        Create an instance from a path, optionally including legacy (distutils/
82        setuptools/distribute) distributions.
83        :param path: The path to use, as a list of directories. If not specified,
84                     sys.path is used.
85        :param include_egg: If True, this instance will look for and return legacy
86                            distributions as well as those based on PEP 376.
87        """
88        if path is None:
89            path = sys.path
90        self.path = path
91        self._include_dist = True
92        self._include_egg = include_egg
93
94        self._cache = _Cache()
95        self._cache_egg = _Cache()
96        self._cache_enabled = True
97        self._scheme = get_scheme('default')
98
99    def _get_cache_enabled(self):
100        return self._cache_enabled
101
102    def _set_cache_enabled(self, value):
103        self._cache_enabled = value
104
105    cache_enabled = property(_get_cache_enabled, _set_cache_enabled)
106
107    def clear_cache(self):
108        """
109        Clears the internal cache.
110        """
111        self._cache.clear()
112        self._cache_egg.clear()
113
114
115    def _yield_distributions(self):
116        """
117        Yield .dist-info and/or .egg(-info) distributions.
118        """
119        # We need to check if we've seen some resources already, because on
120        # some Linux systems (e.g. some Debian/Ubuntu variants) there are
121        # symlinks which alias other files in the environment.
122        seen = set()
123        for path in self.path:
124            finder = resources.finder_for_path(path)
125            if finder is None:
126                continue
127            r = finder.find('')
128            if not r or not r.is_container:
129                continue
130            rset = sorted(r.resources)
131            for entry in rset:
132                r = finder.find(entry)
133                if not r or r.path in seen:
134                    continue
135                if self._include_dist and entry.endswith(DISTINFO_EXT):
136                    possible_filenames = [METADATA_FILENAME,
137                                          WHEEL_METADATA_FILENAME,
138                                          LEGACY_METADATA_FILENAME]
139                    for metadata_filename in possible_filenames:
140                        metadata_path = posixpath.join(entry, metadata_filename)
141                        pydist = finder.find(metadata_path)
142                        if pydist:
143                            break
144                    else:
145                        continue
146
147                    with contextlib.closing(pydist.as_stream()) as stream:
148                        metadata = Metadata(fileobj=stream, scheme='legacy')
149                    logger.debug('Found %s', r.path)
150                    seen.add(r.path)
151                    yield new_dist_class(r.path, metadata=metadata,
152                                         env=self)
153                elif self._include_egg and entry.endswith(('.egg-info',
154                                                          '.egg')):
155                    logger.debug('Found %s', r.path)
156                    seen.add(r.path)
157                    yield old_dist_class(r.path, self)
158
159    def _generate_cache(self):
160        """
161        Scan the path for distributions and populate the cache with
162        those that are found.
163        """
164        gen_dist = not self._cache.generated
165        gen_egg = self._include_egg and not self._cache_egg.generated
166        if gen_dist or gen_egg:
167            for dist in self._yield_distributions():
168                if isinstance(dist, InstalledDistribution):
169                    self._cache.add(dist)
170                else:
171                    self._cache_egg.add(dist)
172
173            if gen_dist:
174                self._cache.generated = True
175            if gen_egg:
176                self._cache_egg.generated = True
177
178    @classmethod
179    def distinfo_dirname(cls, name, version):
180        """
181        The *name* and *version* parameters are converted into their
182        filename-escaped form, i.e. any ``'-'`` characters are replaced
183        with ``'_'`` other than the one in ``'dist-info'`` and the one
184        separating the name from the version number.
185
186        :parameter name: is converted to a standard distribution name by replacing
187                         any runs of non- alphanumeric characters with a single
188                         ``'-'``.
189        :type name: string
190        :parameter version: is converted to a standard version string. Spaces
191                            become dots, and all other non-alphanumeric characters
192                            (except dots) become dashes, with runs of multiple
193                            dashes condensed to a single dash.
194        :type version: string
195        :returns: directory name
196        :rtype: string"""
197        name = name.replace('-', '_')
198        return '-'.join([name, version]) + DISTINFO_EXT
199
200    def get_distributions(self):
201        """
202        Provides an iterator that looks for distributions and returns
203        :class:`InstalledDistribution` or
204        :class:`EggInfoDistribution` instances for each one of them.
205
206        :rtype: iterator of :class:`InstalledDistribution` and
207                :class:`EggInfoDistribution` instances
208        """
209        if not self._cache_enabled:
210            for dist in self._yield_distributions():
211                yield dist
212        else:
213            self._generate_cache()
214
215            for dist in self._cache.path.values():
216                yield dist
217
218            if self._include_egg:
219                for dist in self._cache_egg.path.values():
220                    yield dist
221
222    def get_distribution(self, name):
223        """
224        Looks for a named distribution on the path.
225
226        This function only returns the first result found, as no more than one
227        value is expected. If nothing is found, ``None`` is returned.
228
229        :rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution`
230                or ``None``
231        """
232        result = None
233        name = name.lower()
234        if not self._cache_enabled:
235            for dist in self._yield_distributions():
236                if dist.key == name:
237                    result = dist
238                    break
239        else:
240            self._generate_cache()
241
242            if name in self._cache.name:
243                result = self._cache.name[name][0]
244            elif self._include_egg and name in self._cache_egg.name:
245                result = self._cache_egg.name[name][0]
246        return result
247
248    def provides_distribution(self, name, version=None):
249        """
250        Iterates over all distributions to find which distributions provide *name*.
251        If a *version* is provided, it will be used to filter the results.
252
253        This function only returns the first result found, since no more than
254        one values are expected. If the directory is not found, returns ``None``.
255
256        :parameter version: a version specifier that indicates the version
257                            required, conforming to the format in ``PEP-345``
258
259        :type name: string
260        :type version: string
261        """
262        matcher = None
263        if version is not None:
264            try:
265                matcher = self._scheme.matcher('%s (%s)' % (name, version))
266            except ValueError:
267                raise DistlibException('invalid name or version: %r, %r' %
268                                      (name, version))
269
270        for dist in self.get_distributions():
271            # We hit a problem on Travis where enum34 was installed and doesn't
272            # have a provides attribute ...
273            if not hasattr(dist, 'provides'):
274                logger.debug('No "provides": %s', dist)
275            else:
276                provided = dist.provides
277
278                for p in provided:
279                    p_name, p_ver = parse_name_and_version(p)
280                    if matcher is None:
281                        if p_name == name:
282                            yield dist
283                            break
284                    else:
285                        if p_name == name and matcher.match(p_ver):
286                            yield dist
287                            break
288
289    def get_file_path(self, name, relative_path):
290        """
291        Return the path to a resource file.
292        """
293        dist = self.get_distribution(name)
294        if dist is None:
295            raise LookupError('no distribution named %r found' % name)
296        return dist.get_resource_path(relative_path)
297
298    def get_exported_entries(self, category, name=None):
299        """
300        Return all of the exported entries in a particular category.
301
302        :param category: The category to search for entries.
303        :param name: If specified, only entries with that name are returned.
304        """
305        for dist in self.get_distributions():
306            r = dist.exports
307            if category in r:
308                d = r[category]
309                if name is not None:
310                    if name in d:
311                        yield d[name]
312                else:
313                    for v in d.values():
314                        yield v
315
316
317class Distribution(object):
318    """
319    A base class for distributions, whether installed or from indexes.
320    Either way, it must have some metadata, so that's all that's needed
321    for construction.
322    """
323
324    build_time_dependency = False
325    """
326    Set to True if it's known to be only a build-time dependency (i.e.
327    not needed after installation).
328    """
329
330    requested = False
331    """A boolean that indicates whether the ``REQUESTED`` metadata file is
332    present (in other words, whether the package was installed by user
333    request or it was installed as a dependency)."""
334
335    def __init__(self, metadata):
336        """
337        Initialise an instance.
338        :param metadata: The instance of :class:`Metadata` describing this
339        distribution.
340        """
341        self.metadata = metadata
342        self.name = metadata.name
343        self.key = self.name.lower()    # for case-insensitive comparisons
344        self.version = metadata.version
345        self.locator = None
346        self.digest = None
347        self.extras = None      # additional features requested
348        self.context = None     # environment marker overrides
349        self.download_urls = set()
350        self.digests = {}
351
352    @property
353    def source_url(self):
354        """
355        The source archive download URL for this distribution.
356        """
357        return self.metadata.source_url
358
359    download_url = source_url   # Backward compatibility
360
361    @property
362    def name_and_version(self):
363        """
364        A utility property which displays the name and version in parentheses.
365        """
366        return '%s (%s)' % (self.name, self.version)
367
368    @property
369    def provides(self):
370        """
371        A set of distribution names and versions provided by this distribution.
372        :return: A set of "name (version)" strings.
373        """
374        plist = self.metadata.provides
375        s = '%s (%s)' % (self.name, self.version)
376        if s not in plist:
377            plist.append(s)
378        return plist
379
380    def _get_requirements(self, req_attr):
381        md = self.metadata
382        logger.debug('Getting requirements from metadata %r', md.todict())
383        reqts = getattr(md, req_attr)
384        return set(md.get_requirements(reqts, extras=self.extras,
385                                       env=self.context))
386
387    @property
388    def run_requires(self):
389        return self._get_requirements('run_requires')
390
391    @property
392    def meta_requires(self):
393        return self._get_requirements('meta_requires')
394
395    @property
396    def build_requires(self):
397        return self._get_requirements('build_requires')
398
399    @property
400    def test_requires(self):
401        return self._get_requirements('test_requires')
402
403    @property
404    def dev_requires(self):
405        return self._get_requirements('dev_requires')
406
407    def matches_requirement(self, req):
408        """
409        Say if this instance matches (fulfills) a requirement.
410        :param req: The requirement to match.
411        :rtype req: str
412        :return: True if it matches, else False.
413        """
414        # Requirement may contain extras - parse to lose those
415        # from what's passed to the matcher
416        r = parse_requirement(req)
417        scheme = get_scheme(self.metadata.scheme)
418        try:
419            matcher = scheme.matcher(r.requirement)
420        except UnsupportedVersionError:
421            # XXX compat-mode if cannot read the version
422            logger.warning('could not read version %r - using name only',
423                           req)
424            name = req.split()[0]
425            matcher = scheme.matcher(name)
426
427        name = matcher.key   # case-insensitive
428
429        result = False
430        for p in self.provides:
431            p_name, p_ver = parse_name_and_version(p)
432            if p_name != name:
433                continue
434            try:
435                result = matcher.match(p_ver)
436                break
437            except UnsupportedVersionError:
438                pass
439        return result
440
441    def __repr__(self):
442        """
443        Return a textual representation of this instance,
444        """
445        if self.source_url:
446            suffix = ' [%s]' % self.source_url
447        else:
448            suffix = ''
449        return '<Distribution %s (%s)%s>' % (self.name, self.version, suffix)
450
451    def __eq__(self, other):
452        """
453        See if this distribution is the same as another.
454        :param other: The distribution to compare with. To be equal to one
455                      another. distributions must have the same type, name,
456                      version and source_url.
457        :return: True if it is the same, else False.
458        """
459        if type(other) is not type(self):
460            result = False
461        else:
462            result = (self.name == other.name and
463                      self.version == other.version and
464                      self.source_url == other.source_url)
465        return result
466
467    def __hash__(self):
468        """
469        Compute hash in a way which matches the equality test.
470        """
471        return hash(self.name) + hash(self.version) + hash(self.source_url)
472
473
474class BaseInstalledDistribution(Distribution):
475    """
476    This is the base class for installed distributions (whether PEP 376 or
477    legacy).
478    """
479
480    hasher = None
481
482    def __init__(self, metadata, path, env=None):
483        """
484        Initialise an instance.
485        :param metadata: An instance of :class:`Metadata` which describes the
486                         distribution. This will normally have been initialised
487                         from a metadata file in the ``path``.
488        :param path:     The path of the ``.dist-info`` or ``.egg-info``
489                         directory for the distribution.
490        :param env:      This is normally the :class:`DistributionPath`
491                         instance where this distribution was found.
492        """
493        super(BaseInstalledDistribution, self).__init__(metadata)
494        self.path = path
495        self.dist_path = env
496
497    def get_hash(self, data, hasher=None):
498        """
499        Get the hash of some data, using a particular hash algorithm, if
500        specified.
501
502        :param data: The data to be hashed.
503        :type data: bytes
504        :param hasher: The name of a hash implementation, supported by hashlib,
505                       or ``None``. Examples of valid values are ``'sha1'``,
506                       ``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and
507                       ``'sha512'``. If no hasher is specified, the ``hasher``
508                       attribute of the :class:`InstalledDistribution` instance
509                       is used. If the hasher is determined to be ``None``, MD5
510                       is used as the hashing algorithm.
511        :returns: The hash of the data. If a hasher was explicitly specified,
512                  the returned hash will be prefixed with the specified hasher
513                  followed by '='.
514        :rtype: str
515        """
516        if hasher is None:
517            hasher = self.hasher
518        if hasher is None:
519            hasher = hashlib.md5
520            prefix = ''
521        else:
522            hasher = getattr(hashlib, hasher)
523            prefix = '%s=' % self.hasher
524        digest = hasher(data).digest()
525        digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii')
526        return '%s%s' % (prefix, digest)
527
528
529class InstalledDistribution(BaseInstalledDistribution):
530    """
531    Created with the *path* of the ``.dist-info`` directory provided to the
532    constructor. It reads the metadata contained in ``pydist.json`` when it is
533    instantiated., or uses a passed in Metadata instance (useful for when
534    dry-run mode is being used).
535    """
536
537    hasher = 'sha256'
538
539    def __init__(self, path, metadata=None, env=None):
540        self.modules = []
541        self.finder = finder = resources.finder_for_path(path)
542        if finder is None:
543            raise ValueError('finder unavailable for %s' % path)
544        if env and env._cache_enabled and path in env._cache.path:
545            metadata = env._cache.path[path].metadata
546        elif metadata is None:
547            r = finder.find(METADATA_FILENAME)
548            # Temporary - for Wheel 0.23 support
549            if r is None:
550                r = finder.find(WHEEL_METADATA_FILENAME)
551            # Temporary - for legacy support
552            if r is None:
553                r = finder.find(LEGACY_METADATA_FILENAME)
554            if r is None:
555                raise ValueError('no %s found in %s' % (METADATA_FILENAME,
556                                                        path))
557            with contextlib.closing(r.as_stream()) as stream:
558                metadata = Metadata(fileobj=stream, scheme='legacy')
559
560        super(InstalledDistribution, self).__init__(metadata, path, env)
561
562        if env and env._cache_enabled:
563            env._cache.add(self)
564
565        r = finder.find('REQUESTED')
566        self.requested = r is not None
567        p  = os.path.join(path, 'top_level.txt')
568        if os.path.exists(p):
569            with open(p, 'rb') as f:
570                data = f.read().decode('utf-8')
571            self.modules = data.splitlines()
572
573    def __repr__(self):
574        return '<InstalledDistribution %r %s at %r>' % (
575            self.name, self.version, self.path)
576
577    def __str__(self):
578        return "%s %s" % (self.name, self.version)
579
580    def _get_records(self):
581        """
582        Get the list of installed files for the distribution
583        :return: A list of tuples of path, hash and size. Note that hash and
584                 size might be ``None`` for some entries. The path is exactly
585                 as stored in the file (which is as in PEP 376).
586        """
587        results = []
588        r = self.get_distinfo_resource('RECORD')
589        with contextlib.closing(r.as_stream()) as stream:
590            with CSVReader(stream=stream) as record_reader:
591                # Base location is parent dir of .dist-info dir
592                #base_location = os.path.dirname(self.path)
593                #base_location = os.path.abspath(base_location)
594                for row in record_reader:
595                    missing = [None for i in range(len(row), 3)]
596                    path, checksum, size = row + missing
597                    #if not os.path.isabs(path):
598                    #    path = path.replace('/', os.sep)
599                    #    path = os.path.join(base_location, path)
600                    results.append((path, checksum, size))
601        return results
602
603    @cached_property
604    def exports(self):
605        """
606        Return the information exported by this distribution.
607        :return: A dictionary of exports, mapping an export category to a dict
608                 of :class:`ExportEntry` instances describing the individual
609                 export entries, and keyed by name.
610        """
611        result = {}
612        r = self.get_distinfo_resource(EXPORTS_FILENAME)
613        if r:
614            result = self.read_exports()
615        return result
616
617    def read_exports(self):
618        """
619        Read exports data from a file in .ini format.
620
621        :return: A dictionary of exports, mapping an export category to a list
622                 of :class:`ExportEntry` instances describing the individual
623                 export entries.
624        """
625        result = {}
626        r = self.get_distinfo_resource(EXPORTS_FILENAME)
627        if r:
628            with contextlib.closing(r.as_stream()) as stream:
629                result = read_exports(stream)
630        return result
631
632    def write_exports(self, exports):
633        """
634        Write a dictionary of exports to a file in .ini format.
635        :param exports: A dictionary of exports, mapping an export category to
636                        a list of :class:`ExportEntry` instances describing the
637                        individual export entries.
638        """
639        rf = self.get_distinfo_file(EXPORTS_FILENAME)
640        with open(rf, 'w') as f:
641            write_exports(exports, f)
642
643    def get_resource_path(self, relative_path):
644        """
645        NOTE: This API may change in the future.
646
647        Return the absolute path to a resource file with the given relative
648        path.
649
650        :param relative_path: The path, relative to .dist-info, of the resource
651                              of interest.
652        :return: The absolute path where the resource is to be found.
653        """
654        r = self.get_distinfo_resource('RESOURCES')
655        with contextlib.closing(r.as_stream()) as stream:
656            with CSVReader(stream=stream) as resources_reader:
657                for relative, destination in resources_reader:
658                    if relative == relative_path:
659                        return destination
660        raise KeyError('no resource file with relative path %r '
661                       'is installed' % relative_path)
662
663    def list_installed_files(self):
664        """
665        Iterates over the ``RECORD`` entries and returns a tuple
666        ``(path, hash, size)`` for each line.
667
668        :returns: iterator of (path, hash, size)
669        """
670        for result in self._get_records():
671            yield result
672
673    def write_installed_files(self, paths, prefix, dry_run=False):
674        """
675        Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any
676        existing ``RECORD`` file is silently overwritten.
677
678        prefix is used to determine when to write absolute paths.
679        """
680        prefix = os.path.join(prefix, '')
681        base = os.path.dirname(self.path)
682        base_under_prefix = base.startswith(prefix)
683        base = os.path.join(base, '')
684        record_path = self.get_distinfo_file('RECORD')
685        logger.info('creating %s', record_path)
686        if dry_run:
687            return None
688        with CSVWriter(record_path) as writer:
689            for path in paths:
690                if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')):
691                    # do not put size and hash, as in PEP-376
692                    hash_value = size = ''
693                else:
694                    size = '%d' % os.path.getsize(path)
695                    with open(path, 'rb') as fp:
696                        hash_value = self.get_hash(fp.read())
697                if path.startswith(base) or (base_under_prefix and
698                                             path.startswith(prefix)):
699                    path = os.path.relpath(path, base)
700                writer.writerow((path, hash_value, size))
701
702            # add the RECORD file itself
703            if record_path.startswith(base):
704                record_path = os.path.relpath(record_path, base)
705            writer.writerow((record_path, '', ''))
706        return record_path
707
708    def check_installed_files(self):
709        """
710        Checks that the hashes and sizes of the files in ``RECORD`` are
711        matched by the files themselves. Returns a (possibly empty) list of
712        mismatches. Each entry in the mismatch list will be a tuple consisting
713        of the path, 'exists', 'size' or 'hash' according to what didn't match
714        (existence is checked first, then size, then hash), the expected
715        value and the actual value.
716        """
717        mismatches = []
718        base = os.path.dirname(self.path)
719        record_path = self.get_distinfo_file('RECORD')
720        for path, hash_value, size in self.list_installed_files():
721            if not os.path.isabs(path):
722                path = os.path.join(base, path)
723            if path == record_path:
724                continue
725            if not os.path.exists(path):
726                mismatches.append((path, 'exists', True, False))
727            elif os.path.isfile(path):
728                actual_size = str(os.path.getsize(path))
729                if size and actual_size != size:
730                    mismatches.append((path, 'size', size, actual_size))
731                elif hash_value:
732                    if '=' in hash_value:
733                        hasher = hash_value.split('=', 1)[0]
734                    else:
735                        hasher = None
736
737                    with open(path, 'rb') as f:
738                        actual_hash = self.get_hash(f.read(), hasher)
739                        if actual_hash != hash_value:
740                            mismatches.append((path, 'hash', hash_value, actual_hash))
741        return mismatches
742
743    @cached_property
744    def shared_locations(self):
745        """
746        A dictionary of shared locations whose keys are in the set 'prefix',
747        'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'.
748        The corresponding value is the absolute path of that category for
749        this distribution, and takes into account any paths selected by the
750        user at installation time (e.g. via command-line arguments). In the
751        case of the 'namespace' key, this would be a list of absolute paths
752        for the roots of namespace packages in this distribution.
753
754        The first time this property is accessed, the relevant information is
755        read from the SHARED file in the .dist-info directory.
756        """
757        result = {}
758        shared_path = os.path.join(self.path, 'SHARED')
759        if os.path.isfile(shared_path):
760            with codecs.open(shared_path, 'r', encoding='utf-8') as f:
761                lines = f.read().splitlines()
762            for line in lines:
763                key, value = line.split('=', 1)
764                if key == 'namespace':
765                    result.setdefault(key, []).append(value)
766                else:
767                    result[key] = value
768        return result
769
770    def write_shared_locations(self, paths, dry_run=False):
771        """
772        Write shared location information to the SHARED file in .dist-info.
773        :param paths: A dictionary as described in the documentation for
774        :meth:`shared_locations`.
775        :param dry_run: If True, the action is logged but no file is actually
776                        written.
777        :return: The path of the file written to.
778        """
779        shared_path = os.path.join(self.path, 'SHARED')
780        logger.info('creating %s', shared_path)
781        if dry_run:
782            return None
783        lines = []
784        for key in ('prefix', 'lib', 'headers', 'scripts', 'data'):
785            path = paths[key]
786            if os.path.isdir(paths[key]):
787                lines.append('%s=%s' % (key,  path))
788        for ns in paths.get('namespace', ()):
789            lines.append('namespace=%s' % ns)
790
791        with codecs.open(shared_path, 'w', encoding='utf-8') as f:
792            f.write('\n'.join(lines))
793        return shared_path
794
795    def get_distinfo_resource(self, path):
796        if path not in DIST_FILES:
797            raise DistlibException('invalid path for a dist-info file: '
798                                   '%r at %r' % (path, self.path))
799        finder = resources.finder_for_path(self.path)
800        if finder is None:
801            raise DistlibException('Unable to get a finder for %s' % self.path)
802        return finder.find(path)
803
804    def get_distinfo_file(self, path):
805        """
806        Returns a path located under the ``.dist-info`` directory. Returns a
807        string representing the path.
808
809        :parameter path: a ``'/'``-separated path relative to the
810                         ``.dist-info`` directory or an absolute path;
811                         If *path* is an absolute path and doesn't start
812                         with the ``.dist-info`` directory path,
813                         a :class:`DistlibException` is raised
814        :type path: str
815        :rtype: str
816        """
817        # Check if it is an absolute path  # XXX use relpath, add tests
818        if path.find(os.sep) >= 0:
819            # it's an absolute path?
820            distinfo_dirname, path = path.split(os.sep)[-2:]
821            if distinfo_dirname != self.path.split(os.sep)[-1]:
822                raise DistlibException(
823                    'dist-info file %r does not belong to the %r %s '
824                    'distribution' % (path, self.name, self.version))
825
826        # The file must be relative
827        if path not in DIST_FILES:
828            raise DistlibException('invalid path for a dist-info file: '
829                                   '%r at %r' % (path, self.path))
830
831        return os.path.join(self.path, path)
832
833    def list_distinfo_files(self):
834        """
835        Iterates over the ``RECORD`` entries and returns paths for each line if
836        the path is pointing to a file located in the ``.dist-info`` directory
837        or one of its subdirectories.
838
839        :returns: iterator of paths
840        """
841        base = os.path.dirname(self.path)
842        for path, checksum, size in self._get_records():
843            # XXX add separator or use real relpath algo
844            if not os.path.isabs(path):
845                path = os.path.join(base, path)
846            if path.startswith(self.path):
847                yield path
848
849    def __eq__(self, other):
850        return (isinstance(other, InstalledDistribution) and
851                self.path == other.path)
852
853    # See http://docs.python.org/reference/datamodel#object.__hash__
854    __hash__ = object.__hash__
855
856
857class EggInfoDistribution(BaseInstalledDistribution):
858    """Created with the *path* of the ``.egg-info`` directory or file provided
859    to the constructor. It reads the metadata contained in the file itself, or
860    if the given path happens to be a directory, the metadata is read from the
861    file ``PKG-INFO`` under that directory."""
862
863    requested = True    # as we have no way of knowing, assume it was
864    shared_locations = {}
865
866    def __init__(self, path, env=None):
867        def set_name_and_version(s, n, v):
868            s.name = n
869            s.key = n.lower()   # for case-insensitive comparisons
870            s.version = v
871
872        self.path = path
873        self.dist_path = env
874        if env and env._cache_enabled and path in env._cache_egg.path:
875            metadata = env._cache_egg.path[path].metadata
876            set_name_and_version(self, metadata.name, metadata.version)
877        else:
878            metadata = self._get_metadata(path)
879
880            # Need to be set before caching
881            set_name_and_version(self, metadata.name, metadata.version)
882
883            if env and env._cache_enabled:
884                env._cache_egg.add(self)
885        super(EggInfoDistribution, self).__init__(metadata, path, env)
886
887    def _get_metadata(self, path):
888        requires = None
889
890        def parse_requires_data(data):
891            """Create a list of dependencies from a requires.txt file.
892
893            *data*: the contents of a setuptools-produced requires.txt file.
894            """
895            reqs = []
896            lines = data.splitlines()
897            for line in lines:
898                line = line.strip()
899                if line.startswith('['):
900                    logger.warning('Unexpected line: quitting requirement scan: %r',
901                                   line)
902                    break
903                r = parse_requirement(line)
904                if not r:
905                    logger.warning('Not recognised as a requirement: %r', line)
906                    continue
907                if r.extras:
908                    logger.warning('extra requirements in requires.txt are '
909                                   'not supported')
910                if not r.constraints:
911                    reqs.append(r.name)
912                else:
913                    cons = ', '.join('%s%s' % c for c in r.constraints)
914                    reqs.append('%s (%s)' % (r.name, cons))
915            return reqs
916
917        def parse_requires_path(req_path):
918            """Create a list of dependencies from a requires.txt file.
919
920            *req_path*: the path to a setuptools-produced requires.txt file.
921            """
922
923            reqs = []
924            try:
925                with codecs.open(req_path, 'r', 'utf-8') as fp:
926                    reqs = parse_requires_data(fp.read())
927            except IOError:
928                pass
929            return reqs
930
931        tl_path = tl_data = None
932        if path.endswith('.egg'):
933            if os.path.isdir(path):
934                p = os.path.join(path, 'EGG-INFO')
935                meta_path = os.path.join(p, 'PKG-INFO')
936                metadata = Metadata(path=meta_path, scheme='legacy')
937                req_path = os.path.join(p, 'requires.txt')
938                tl_path = os.path.join(p, 'top_level.txt')
939                requires = parse_requires_path(req_path)
940            else:
941                # FIXME handle the case where zipfile is not available
942                zipf = zipimport.zipimporter(path)
943                fileobj = StringIO(
944                    zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
945                metadata = Metadata(fileobj=fileobj, scheme='legacy')
946                try:
947                    data = zipf.get_data('EGG-INFO/requires.txt')
948                    tl_data = zipf.get_data('EGG-INFO/top_level.txt').decode('utf-8')
949                    requires = parse_requires_data(data.decode('utf-8'))
950                except IOError:
951                    requires = None
952        elif path.endswith('.egg-info'):
953            if os.path.isdir(path):
954                req_path = os.path.join(path, 'requires.txt')
955                requires = parse_requires_path(req_path)
956                path = os.path.join(path, 'PKG-INFO')
957                tl_path = os.path.join(path, 'top_level.txt')
958            metadata = Metadata(path=path, scheme='legacy')
959        else:
960            raise DistlibException('path must end with .egg-info or .egg, '
961                                   'got %r' % path)
962
963        if requires:
964            metadata.add_requirements(requires)
965        # look for top-level modules in top_level.txt, if present
966        if tl_data is None:
967            if tl_path is not None and os.path.exists(tl_path):
968                with open(tl_path, 'rb') as f:
969                    tl_data = f.read().decode('utf-8')
970        if not tl_data:
971            tl_data = []
972        else:
973            tl_data = tl_data.splitlines()
974        self.modules = tl_data
975        return metadata
976
977    def __repr__(self):
978        return '<EggInfoDistribution %r %s at %r>' % (
979            self.name, self.version, self.path)
980
981    def __str__(self):
982        return "%s %s" % (self.name, self.version)
983
984    def check_installed_files(self):
985        """
986        Checks that the hashes and sizes of the files in ``RECORD`` are
987        matched by the files themselves. Returns a (possibly empty) list of
988        mismatches. Each entry in the mismatch list will be a tuple consisting
989        of the path, 'exists', 'size' or 'hash' according to what didn't match
990        (existence is checked first, then size, then hash), the expected
991        value and the actual value.
992        """
993        mismatches = []
994        record_path = os.path.join(self.path, 'installed-files.txt')
995        if os.path.exists(record_path):
996            for path, _, _ in self.list_installed_files():
997                if path == record_path:
998                    continue
999                if not os.path.exists(path):
1000                    mismatches.append((path, 'exists', True, False))
1001        return mismatches
1002
1003    def list_installed_files(self):
1004        """
1005        Iterates over the ``installed-files.txt`` entries and returns a tuple
1006        ``(path, hash, size)`` for each line.
1007
1008        :returns: a list of (path, hash, size)
1009        """
1010
1011        def _md5(path):
1012            f = open(path, 'rb')
1013            try:
1014                content = f.read()
1015            finally:
1016                f.close()
1017            return hashlib.md5(content).hexdigest()
1018
1019        def _size(path):
1020            return os.stat(path).st_size
1021
1022        record_path = os.path.join(self.path, 'installed-files.txt')
1023        result = []
1024        if os.path.exists(record_path):
1025            with codecs.open(record_path, 'r', encoding='utf-8') as f:
1026                for line in f:
1027                    line = line.strip()
1028                    p = os.path.normpath(os.path.join(self.path, line))
1029                    # "./" is present as a marker between installed files
1030                    # and installation metadata files
1031                    if not os.path.exists(p):
1032                        logger.warning('Non-existent file: %s', p)
1033                        if p.endswith(('.pyc', '.pyo')):
1034                            continue
1035                        #otherwise fall through and fail
1036                    if not os.path.isdir(p):
1037                        result.append((p, _md5(p), _size(p)))
1038            result.append((record_path, None, None))
1039        return result
1040
1041    def list_distinfo_files(self, absolute=False):
1042        """
1043        Iterates over the ``installed-files.txt`` entries and returns paths for
1044        each line if the path is pointing to a file located in the
1045        ``.egg-info`` directory or one of its subdirectories.
1046
1047        :parameter absolute: If *absolute* is ``True``, each returned path is
1048                          transformed into a local absolute path. Otherwise the
1049                          raw value from ``installed-files.txt`` is returned.
1050        :type absolute: boolean
1051        :returns: iterator of paths
1052        """
1053        record_path = os.path.join(self.path, 'installed-files.txt')
1054        if os.path.exists(record_path):
1055            skip = True
1056            with codecs.open(record_path, 'r', encoding='utf-8') as f:
1057                for line in f:
1058                    line = line.strip()
1059                    if line == './':
1060                        skip = False
1061                        continue
1062                    if not skip:
1063                        p = os.path.normpath(os.path.join(self.path, line))
1064                        if p.startswith(self.path):
1065                            if absolute:
1066                                yield p
1067                            else:
1068                                yield line
1069
1070    def __eq__(self, other):
1071        return (isinstance(other, EggInfoDistribution) and
1072                self.path == other.path)
1073
1074    # See http://docs.python.org/reference/datamodel#object.__hash__
1075    __hash__ = object.__hash__
1076
1077new_dist_class = InstalledDistribution
1078old_dist_class = EggInfoDistribution
1079
1080
1081class DependencyGraph(object):
1082    """
1083    Represents a dependency graph between distributions.
1084
1085    The dependency relationships are stored in an ``adjacency_list`` that maps
1086    distributions to a list of ``(other, label)`` tuples where  ``other``
1087    is a distribution and the edge is labeled with ``label`` (i.e. the version
1088    specifier, if such was provided). Also, for more efficient traversal, for
1089    every distribution ``x``, a list of predecessors is kept in
1090    ``reverse_list[x]``. An edge from distribution ``a`` to
1091    distribution ``b`` means that ``a`` depends on ``b``. If any missing
1092    dependencies are found, they are stored in ``missing``, which is a
1093    dictionary that maps distributions to a list of requirements that were not
1094    provided by any other distributions.
1095    """
1096
1097    def __init__(self):
1098        self.adjacency_list = {}
1099        self.reverse_list = {}
1100        self.missing = {}
1101
1102    def add_distribution(self, distribution):
1103        """Add the *distribution* to the graph.
1104
1105        :type distribution: :class:`distutils2.database.InstalledDistribution`
1106                            or :class:`distutils2.database.EggInfoDistribution`
1107        """
1108        self.adjacency_list[distribution] = []
1109        self.reverse_list[distribution] = []
1110        #self.missing[distribution] = []
1111
1112    def add_edge(self, x, y, label=None):
1113        """Add an edge from distribution *x* to distribution *y* with the given
1114        *label*.
1115
1116        :type x: :class:`distutils2.database.InstalledDistribution` or
1117                 :class:`distutils2.database.EggInfoDistribution`
1118        :type y: :class:`distutils2.database.InstalledDistribution` or
1119                 :class:`distutils2.database.EggInfoDistribution`
1120        :type label: ``str`` or ``None``
1121        """
1122        self.adjacency_list[x].append((y, label))
1123        # multiple edges are allowed, so be careful
1124        if x not in self.reverse_list[y]:
1125            self.reverse_list[y].append(x)
1126
1127    def add_missing(self, distribution, requirement):
1128        """
1129        Add a missing *requirement* for the given *distribution*.
1130
1131        :type distribution: :class:`distutils2.database.InstalledDistribution`
1132                            or :class:`distutils2.database.EggInfoDistribution`
1133        :type requirement: ``str``
1134        """
1135        logger.debug('%s missing %r', distribution, requirement)
1136        self.missing.setdefault(distribution, []).append(requirement)
1137
1138    def _repr_dist(self, dist):
1139        return '%s %s' % (dist.name, dist.version)
1140
1141    def repr_node(self, dist, level=1):
1142        """Prints only a subgraph"""
1143        output = [self._repr_dist(dist)]
1144        for other, label in self.adjacency_list[dist]:
1145            dist = self._repr_dist(other)
1146            if label is not None:
1147                dist = '%s [%s]' % (dist, label)
1148            output.append('    ' * level + str(dist))
1149            suboutput = self.repr_node(other, level + 1)
1150            subs = suboutput.split('\n')
1151            output.extend(subs[1:])
1152        return '\n'.join(output)
1153
1154    def to_dot(self, f, skip_disconnected=True):
1155        """Writes a DOT output for the graph to the provided file *f*.
1156
1157        If *skip_disconnected* is set to ``True``, then all distributions
1158        that are not dependent on any other distribution are skipped.
1159
1160        :type f: has to support ``file``-like operations
1161        :type skip_disconnected: ``bool``
1162        """
1163        disconnected = []
1164
1165        f.write("digraph dependencies {\n")
1166        for dist, adjs in self.adjacency_list.items():
1167            if len(adjs) == 0 and not skip_disconnected:
1168                disconnected.append(dist)
1169            for other, label in adjs:
1170                if not label is None:
1171                    f.write('"%s" -> "%s" [label="%s"]\n' %
1172                            (dist.name, other.name, label))
1173                else:
1174                    f.write('"%s" -> "%s"\n' % (dist.name, other.name))
1175        if not skip_disconnected and len(disconnected) > 0:
1176            f.write('subgraph disconnected {\n')
1177            f.write('label = "Disconnected"\n')
1178            f.write('bgcolor = red\n')
1179
1180            for dist in disconnected:
1181                f.write('"%s"' % dist.name)
1182                f.write('\n')
1183            f.write('}\n')
1184        f.write('}\n')
1185
1186    def topological_sort(self):
1187        """
1188        Perform a topological sort of the graph.
1189        :return: A tuple, the first element of which is a topologically sorted
1190                 list of distributions, and the second element of which is a
1191                 list of distributions that cannot be sorted because they have
1192                 circular dependencies and so form a cycle.
1193        """
1194        result = []
1195        # Make a shallow copy of the adjacency list
1196        alist = {}
1197        for k, v in self.adjacency_list.items():
1198            alist[k] = v[:]
1199        while True:
1200            # See what we can remove in this run
1201            to_remove = []
1202            for k, v in list(alist.items())[:]:
1203                if not v:
1204                    to_remove.append(k)
1205                    del alist[k]
1206            if not to_remove:
1207                # What's left in alist (if anything) is a cycle.
1208                break
1209            # Remove from the adjacency list of others
1210            for k, v in alist.items():
1211                alist[k] = [(d, r) for d, r in v if d not in to_remove]
1212            logger.debug('Moving to result: %s',
1213                         ['%s (%s)' % (d.name, d.version) for d in to_remove])
1214            result.extend(to_remove)
1215        return result, list(alist.keys())
1216
1217    def __repr__(self):
1218        """Representation of the graph"""
1219        output = []
1220        for dist, adjs in self.adjacency_list.items():
1221            output.append(self.repr_node(dist))
1222        return '\n'.join(output)
1223
1224
1225def make_graph(dists, scheme='default'):
1226    """Makes a dependency graph from the given distributions.
1227
1228    :parameter dists: a list of distributions
1229    :type dists: list of :class:`distutils2.database.InstalledDistribution` and
1230                 :class:`distutils2.database.EggInfoDistribution` instances
1231    :rtype: a :class:`DependencyGraph` instance
1232    """
1233    scheme = get_scheme(scheme)
1234    graph = DependencyGraph()
1235    provided = {}  # maps names to lists of (version, dist) tuples
1236
1237    # first, build the graph and find out what's provided
1238    for dist in dists:
1239        graph.add_distribution(dist)
1240
1241        for p in dist.provides:
1242            name, version = parse_name_and_version(p)
1243            logger.debug('Add to provided: %s, %s, %s', name, version, dist)
1244            provided.setdefault(name, []).append((version, dist))
1245
1246    # now make the edges
1247    for dist in dists:
1248        requires = (dist.run_requires | dist.meta_requires |
1249                    dist.build_requires | dist.dev_requires)
1250        for req in requires:
1251            try:
1252                matcher = scheme.matcher(req)
1253            except UnsupportedVersionError:
1254                # XXX compat-mode if cannot read the version
1255                logger.warning('could not read version %r - using name only',
1256                               req)
1257                name = req.split()[0]
1258                matcher = scheme.matcher(name)
1259
1260            name = matcher.key   # case-insensitive
1261
1262            matched = False
1263            if name in provided:
1264                for version, provider in provided[name]:
1265                    try:
1266                        match = matcher.match(version)
1267                    except UnsupportedVersionError:
1268                        match = False
1269
1270                    if match:
1271                        graph.add_edge(dist, provider, req)
1272                        matched = True
1273                        break
1274            if not matched:
1275                graph.add_missing(dist, req)
1276    return graph
1277
1278
1279def get_dependent_dists(dists, dist):
1280    """Recursively generate a list of distributions from *dists* that are
1281    dependent on *dist*.
1282
1283    :param dists: a list of distributions
1284    :param dist: a distribution, member of *dists* for which we are interested
1285    """
1286    if dist not in dists:
1287        raise DistlibException('given distribution %r is not a member '
1288                               'of the list' % dist.name)
1289    graph = make_graph(dists)
1290
1291    dep = [dist]  # dependent distributions
1292    todo = graph.reverse_list[dist]  # list of nodes we should inspect
1293
1294    while todo:
1295        d = todo.pop()
1296        dep.append(d)
1297        for succ in graph.reverse_list[d]:
1298            if succ not in dep:
1299                todo.append(succ)
1300
1301    dep.pop(0)  # remove dist from dep, was there to prevent infinite loops
1302    return dep
1303
1304
1305def get_required_dists(dists, dist):
1306    """Recursively generate a list of distributions from *dists* that are
1307    required by *dist*.
1308
1309    :param dists: a list of distributions
1310    :param dist: a distribution, member of *dists* for which we are interested
1311    """
1312    if dist not in dists:
1313        raise DistlibException('given distribution %r is not a member '
1314                               'of the list' % dist.name)
1315    graph = make_graph(dists)
1316
1317    req = []  # required distributions
1318    todo = graph.adjacency_list[dist]  # list of nodes we should inspect
1319
1320    while todo:
1321        d = todo.pop()[0]
1322        req.append(d)
1323        for pred in graph.adjacency_list[d]:
1324            if pred not in req:
1325                todo.append(pred)
1326
1327    return req
1328
1329
1330def make_dist(name, version, **kwargs):
1331    """
1332    A convenience method for making a dist given just a name and version.
1333    """
1334    summary = kwargs.pop('summary', 'Placeholder for summary')
1335    md = Metadata(**kwargs)
1336    md.name = name
1337    md.version = version
1338    md.summary = summary or 'Placeholder for summary'
1339    return Distribution(md)
1340