1# -*- coding: utf-8 -*- 2# 3# Copyright (C) 2012-2017 The Python Software Foundation. 4# See LICENSE.txt and CONTRIBUTORS.txt. 5# 6"""PEP 376 implementation.""" 7 8from __future__ import unicode_literals 9 10import base64 11import codecs 12import contextlib 13import hashlib 14import logging 15import os 16import posixpath 17import sys 18import zipimport 19 20from . import DistlibException, resources 21from .compat import StringIO 22from .version import get_scheme, UnsupportedVersionError 23from .metadata import (Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME, 24 LEGACY_METADATA_FILENAME) 25from .util import (parse_requirement, cached_property, parse_name_and_version, 26 read_exports, write_exports, CSVReader, CSVWriter) 27 28 29__all__ = ['Distribution', 'BaseInstalledDistribution', 30 'InstalledDistribution', 'EggInfoDistribution', 31 'DistributionPath'] 32 33 34logger = logging.getLogger(__name__) 35 36EXPORTS_FILENAME = 'pydist-exports.json' 37COMMANDS_FILENAME = 'pydist-commands.json' 38 39DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED', 40 'RESOURCES', EXPORTS_FILENAME, 'SHARED') 41 42DISTINFO_EXT = '.dist-info' 43 44 45class _Cache(object): 46 """ 47 A simple cache mapping names and .dist-info paths to distributions 48 """ 49 def __init__(self): 50 """ 51 Initialise an instance. There is normally one for each DistributionPath. 52 """ 53 self.name = {} 54 self.path = {} 55 self.generated = False 56 57 def clear(self): 58 """ 59 Clear the cache, setting it to its initial state. 60 """ 61 self.name.clear() 62 self.path.clear() 63 self.generated = False 64 65 def add(self, dist): 66 """ 67 Add a distribution to the cache. 68 :param dist: The distribution to add. 69 """ 70 if dist.path not in self.path: 71 self.path[dist.path] = dist 72 self.name.setdefault(dist.key, []).append(dist) 73 74 75class DistributionPath(object): 76 """ 77 Represents a set of distributions installed on a path (typically sys.path). 78 """ 79 def __init__(self, path=None, include_egg=False): 80 """ 81 Create an instance from a path, optionally including legacy (distutils/ 82 setuptools/distribute) distributions. 83 :param path: The path to use, as a list of directories. If not specified, 84 sys.path is used. 85 :param include_egg: If True, this instance will look for and return legacy 86 distributions as well as those based on PEP 376. 87 """ 88 if path is None: 89 path = sys.path 90 self.path = path 91 self._include_dist = True 92 self._include_egg = include_egg 93 94 self._cache = _Cache() 95 self._cache_egg = _Cache() 96 self._cache_enabled = True 97 self._scheme = get_scheme('default') 98 99 def _get_cache_enabled(self): 100 return self._cache_enabled 101 102 def _set_cache_enabled(self, value): 103 self._cache_enabled = value 104 105 cache_enabled = property(_get_cache_enabled, _set_cache_enabled) 106 107 def clear_cache(self): 108 """ 109 Clears the internal cache. 110 """ 111 self._cache.clear() 112 self._cache_egg.clear() 113 114 115 def _yield_distributions(self): 116 """ 117 Yield .dist-info and/or .egg(-info) distributions. 118 """ 119 # We need to check if we've seen some resources already, because on 120 # some Linux systems (e.g. some Debian/Ubuntu variants) there are 121 # symlinks which alias other files in the environment. 122 seen = set() 123 for path in self.path: 124 finder = resources.finder_for_path(path) 125 if finder is None: 126 continue 127 r = finder.find('') 128 if not r or not r.is_container: 129 continue 130 rset = sorted(r.resources) 131 for entry in rset: 132 r = finder.find(entry) 133 if not r or r.path in seen: 134 continue 135 if self._include_dist and entry.endswith(DISTINFO_EXT): 136 possible_filenames = [METADATA_FILENAME, 137 WHEEL_METADATA_FILENAME, 138 LEGACY_METADATA_FILENAME] 139 for metadata_filename in possible_filenames: 140 metadata_path = posixpath.join(entry, metadata_filename) 141 pydist = finder.find(metadata_path) 142 if pydist: 143 break 144 else: 145 continue 146 147 with contextlib.closing(pydist.as_stream()) as stream: 148 metadata = Metadata(fileobj=stream, scheme='legacy') 149 logger.debug('Found %s', r.path) 150 seen.add(r.path) 151 yield new_dist_class(r.path, metadata=metadata, 152 env=self) 153 elif self._include_egg and entry.endswith(('.egg-info', 154 '.egg')): 155 logger.debug('Found %s', r.path) 156 seen.add(r.path) 157 yield old_dist_class(r.path, self) 158 159 def _generate_cache(self): 160 """ 161 Scan the path for distributions and populate the cache with 162 those that are found. 163 """ 164 gen_dist = not self._cache.generated 165 gen_egg = self._include_egg and not self._cache_egg.generated 166 if gen_dist or gen_egg: 167 for dist in self._yield_distributions(): 168 if isinstance(dist, InstalledDistribution): 169 self._cache.add(dist) 170 else: 171 self._cache_egg.add(dist) 172 173 if gen_dist: 174 self._cache.generated = True 175 if gen_egg: 176 self._cache_egg.generated = True 177 178 @classmethod 179 def distinfo_dirname(cls, name, version): 180 """ 181 The *name* and *version* parameters are converted into their 182 filename-escaped form, i.e. any ``'-'`` characters are replaced 183 with ``'_'`` other than the one in ``'dist-info'`` and the one 184 separating the name from the version number. 185 186 :parameter name: is converted to a standard distribution name by replacing 187 any runs of non- alphanumeric characters with a single 188 ``'-'``. 189 :type name: string 190 :parameter version: is converted to a standard version string. Spaces 191 become dots, and all other non-alphanumeric characters 192 (except dots) become dashes, with runs of multiple 193 dashes condensed to a single dash. 194 :type version: string 195 :returns: directory name 196 :rtype: string""" 197 name = name.replace('-', '_') 198 return '-'.join([name, version]) + DISTINFO_EXT 199 200 def get_distributions(self): 201 """ 202 Provides an iterator that looks for distributions and returns 203 :class:`InstalledDistribution` or 204 :class:`EggInfoDistribution` instances for each one of them. 205 206 :rtype: iterator of :class:`InstalledDistribution` and 207 :class:`EggInfoDistribution` instances 208 """ 209 if not self._cache_enabled: 210 for dist in self._yield_distributions(): 211 yield dist 212 else: 213 self._generate_cache() 214 215 for dist in self._cache.path.values(): 216 yield dist 217 218 if self._include_egg: 219 for dist in self._cache_egg.path.values(): 220 yield dist 221 222 def get_distribution(self, name): 223 """ 224 Looks for a named distribution on the path. 225 226 This function only returns the first result found, as no more than one 227 value is expected. If nothing is found, ``None`` is returned. 228 229 :rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution` 230 or ``None`` 231 """ 232 result = None 233 name = name.lower() 234 if not self._cache_enabled: 235 for dist in self._yield_distributions(): 236 if dist.key == name: 237 result = dist 238 break 239 else: 240 self._generate_cache() 241 242 if name in self._cache.name: 243 result = self._cache.name[name][0] 244 elif self._include_egg and name in self._cache_egg.name: 245 result = self._cache_egg.name[name][0] 246 return result 247 248 def provides_distribution(self, name, version=None): 249 """ 250 Iterates over all distributions to find which distributions provide *name*. 251 If a *version* is provided, it will be used to filter the results. 252 253 This function only returns the first result found, since no more than 254 one values are expected. If the directory is not found, returns ``None``. 255 256 :parameter version: a version specifier that indicates the version 257 required, conforming to the format in ``PEP-345`` 258 259 :type name: string 260 :type version: string 261 """ 262 matcher = None 263 if version is not None: 264 try: 265 matcher = self._scheme.matcher('%s (%s)' % (name, version)) 266 except ValueError: 267 raise DistlibException('invalid name or version: %r, %r' % 268 (name, version)) 269 270 for dist in self.get_distributions(): 271 # We hit a problem on Travis where enum34 was installed and doesn't 272 # have a provides attribute ... 273 if not hasattr(dist, 'provides'): 274 logger.debug('No "provides": %s', dist) 275 else: 276 provided = dist.provides 277 278 for p in provided: 279 p_name, p_ver = parse_name_and_version(p) 280 if matcher is None: 281 if p_name == name: 282 yield dist 283 break 284 else: 285 if p_name == name and matcher.match(p_ver): 286 yield dist 287 break 288 289 def get_file_path(self, name, relative_path): 290 """ 291 Return the path to a resource file. 292 """ 293 dist = self.get_distribution(name) 294 if dist is None: 295 raise LookupError('no distribution named %r found' % name) 296 return dist.get_resource_path(relative_path) 297 298 def get_exported_entries(self, category, name=None): 299 """ 300 Return all of the exported entries in a particular category. 301 302 :param category: The category to search for entries. 303 :param name: If specified, only entries with that name are returned. 304 """ 305 for dist in self.get_distributions(): 306 r = dist.exports 307 if category in r: 308 d = r[category] 309 if name is not None: 310 if name in d: 311 yield d[name] 312 else: 313 for v in d.values(): 314 yield v 315 316 317class Distribution(object): 318 """ 319 A base class for distributions, whether installed or from indexes. 320 Either way, it must have some metadata, so that's all that's needed 321 for construction. 322 """ 323 324 build_time_dependency = False 325 """ 326 Set to True if it's known to be only a build-time dependency (i.e. 327 not needed after installation). 328 """ 329 330 requested = False 331 """A boolean that indicates whether the ``REQUESTED`` metadata file is 332 present (in other words, whether the package was installed by user 333 request or it was installed as a dependency).""" 334 335 def __init__(self, metadata): 336 """ 337 Initialise an instance. 338 :param metadata: The instance of :class:`Metadata` describing this 339 distribution. 340 """ 341 self.metadata = metadata 342 self.name = metadata.name 343 self.key = self.name.lower() # for case-insensitive comparisons 344 self.version = metadata.version 345 self.locator = None 346 self.digest = None 347 self.extras = None # additional features requested 348 self.context = None # environment marker overrides 349 self.download_urls = set() 350 self.digests = {} 351 352 @property 353 def source_url(self): 354 """ 355 The source archive download URL for this distribution. 356 """ 357 return self.metadata.source_url 358 359 download_url = source_url # Backward compatibility 360 361 @property 362 def name_and_version(self): 363 """ 364 A utility property which displays the name and version in parentheses. 365 """ 366 return '%s (%s)' % (self.name, self.version) 367 368 @property 369 def provides(self): 370 """ 371 A set of distribution names and versions provided by this distribution. 372 :return: A set of "name (version)" strings. 373 """ 374 plist = self.metadata.provides 375 s = '%s (%s)' % (self.name, self.version) 376 if s not in plist: 377 plist.append(s) 378 return plist 379 380 def _get_requirements(self, req_attr): 381 md = self.metadata 382 logger.debug('Getting requirements from metadata %r', md.todict()) 383 reqts = getattr(md, req_attr) 384 return set(md.get_requirements(reqts, extras=self.extras, 385 env=self.context)) 386 387 @property 388 def run_requires(self): 389 return self._get_requirements('run_requires') 390 391 @property 392 def meta_requires(self): 393 return self._get_requirements('meta_requires') 394 395 @property 396 def build_requires(self): 397 return self._get_requirements('build_requires') 398 399 @property 400 def test_requires(self): 401 return self._get_requirements('test_requires') 402 403 @property 404 def dev_requires(self): 405 return self._get_requirements('dev_requires') 406 407 def matches_requirement(self, req): 408 """ 409 Say if this instance matches (fulfills) a requirement. 410 :param req: The requirement to match. 411 :rtype req: str 412 :return: True if it matches, else False. 413 """ 414 # Requirement may contain extras - parse to lose those 415 # from what's passed to the matcher 416 r = parse_requirement(req) 417 scheme = get_scheme(self.metadata.scheme) 418 try: 419 matcher = scheme.matcher(r.requirement) 420 except UnsupportedVersionError: 421 # XXX compat-mode if cannot read the version 422 logger.warning('could not read version %r - using name only', 423 req) 424 name = req.split()[0] 425 matcher = scheme.matcher(name) 426 427 name = matcher.key # case-insensitive 428 429 result = False 430 for p in self.provides: 431 p_name, p_ver = parse_name_and_version(p) 432 if p_name != name: 433 continue 434 try: 435 result = matcher.match(p_ver) 436 break 437 except UnsupportedVersionError: 438 pass 439 return result 440 441 def __repr__(self): 442 """ 443 Return a textual representation of this instance, 444 """ 445 if self.source_url: 446 suffix = ' [%s]' % self.source_url 447 else: 448 suffix = '' 449 return '<Distribution %s (%s)%s>' % (self.name, self.version, suffix) 450 451 def __eq__(self, other): 452 """ 453 See if this distribution is the same as another. 454 :param other: The distribution to compare with. To be equal to one 455 another. distributions must have the same type, name, 456 version and source_url. 457 :return: True if it is the same, else False. 458 """ 459 if type(other) is not type(self): 460 result = False 461 else: 462 result = (self.name == other.name and 463 self.version == other.version and 464 self.source_url == other.source_url) 465 return result 466 467 def __hash__(self): 468 """ 469 Compute hash in a way which matches the equality test. 470 """ 471 return hash(self.name) + hash(self.version) + hash(self.source_url) 472 473 474class BaseInstalledDistribution(Distribution): 475 """ 476 This is the base class for installed distributions (whether PEP 376 or 477 legacy). 478 """ 479 480 hasher = None 481 482 def __init__(self, metadata, path, env=None): 483 """ 484 Initialise an instance. 485 :param metadata: An instance of :class:`Metadata` which describes the 486 distribution. This will normally have been initialised 487 from a metadata file in the ``path``. 488 :param path: The path of the ``.dist-info`` or ``.egg-info`` 489 directory for the distribution. 490 :param env: This is normally the :class:`DistributionPath` 491 instance where this distribution was found. 492 """ 493 super(BaseInstalledDistribution, self).__init__(metadata) 494 self.path = path 495 self.dist_path = env 496 497 def get_hash(self, data, hasher=None): 498 """ 499 Get the hash of some data, using a particular hash algorithm, if 500 specified. 501 502 :param data: The data to be hashed. 503 :type data: bytes 504 :param hasher: The name of a hash implementation, supported by hashlib, 505 or ``None``. Examples of valid values are ``'sha1'``, 506 ``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and 507 ``'sha512'``. If no hasher is specified, the ``hasher`` 508 attribute of the :class:`InstalledDistribution` instance 509 is used. If the hasher is determined to be ``None``, MD5 510 is used as the hashing algorithm. 511 :returns: The hash of the data. If a hasher was explicitly specified, 512 the returned hash will be prefixed with the specified hasher 513 followed by '='. 514 :rtype: str 515 """ 516 if hasher is None: 517 hasher = self.hasher 518 if hasher is None: 519 hasher = hashlib.md5 520 prefix = '' 521 else: 522 hasher = getattr(hashlib, hasher) 523 prefix = '%s=' % self.hasher 524 digest = hasher(data).digest() 525 digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii') 526 return '%s%s' % (prefix, digest) 527 528 529class InstalledDistribution(BaseInstalledDistribution): 530 """ 531 Created with the *path* of the ``.dist-info`` directory provided to the 532 constructor. It reads the metadata contained in ``pydist.json`` when it is 533 instantiated., or uses a passed in Metadata instance (useful for when 534 dry-run mode is being used). 535 """ 536 537 hasher = 'sha256' 538 539 def __init__(self, path, metadata=None, env=None): 540 self.modules = [] 541 self.finder = finder = resources.finder_for_path(path) 542 if finder is None: 543 raise ValueError('finder unavailable for %s' % path) 544 if env and env._cache_enabled and path in env._cache.path: 545 metadata = env._cache.path[path].metadata 546 elif metadata is None: 547 r = finder.find(METADATA_FILENAME) 548 # Temporary - for Wheel 0.23 support 549 if r is None: 550 r = finder.find(WHEEL_METADATA_FILENAME) 551 # Temporary - for legacy support 552 if r is None: 553 r = finder.find(LEGACY_METADATA_FILENAME) 554 if r is None: 555 raise ValueError('no %s found in %s' % (METADATA_FILENAME, 556 path)) 557 with contextlib.closing(r.as_stream()) as stream: 558 metadata = Metadata(fileobj=stream, scheme='legacy') 559 560 super(InstalledDistribution, self).__init__(metadata, path, env) 561 562 if env and env._cache_enabled: 563 env._cache.add(self) 564 565 r = finder.find('REQUESTED') 566 self.requested = r is not None 567 p = os.path.join(path, 'top_level.txt') 568 if os.path.exists(p): 569 with open(p, 'rb') as f: 570 data = f.read().decode('utf-8') 571 self.modules = data.splitlines() 572 573 def __repr__(self): 574 return '<InstalledDistribution %r %s at %r>' % ( 575 self.name, self.version, self.path) 576 577 def __str__(self): 578 return "%s %s" % (self.name, self.version) 579 580 def _get_records(self): 581 """ 582 Get the list of installed files for the distribution 583 :return: A list of tuples of path, hash and size. Note that hash and 584 size might be ``None`` for some entries. The path is exactly 585 as stored in the file (which is as in PEP 376). 586 """ 587 results = [] 588 r = self.get_distinfo_resource('RECORD') 589 with contextlib.closing(r.as_stream()) as stream: 590 with CSVReader(stream=stream) as record_reader: 591 # Base location is parent dir of .dist-info dir 592 #base_location = os.path.dirname(self.path) 593 #base_location = os.path.abspath(base_location) 594 for row in record_reader: 595 missing = [None for i in range(len(row), 3)] 596 path, checksum, size = row + missing 597 #if not os.path.isabs(path): 598 # path = path.replace('/', os.sep) 599 # path = os.path.join(base_location, path) 600 results.append((path, checksum, size)) 601 return results 602 603 @cached_property 604 def exports(self): 605 """ 606 Return the information exported by this distribution. 607 :return: A dictionary of exports, mapping an export category to a dict 608 of :class:`ExportEntry` instances describing the individual 609 export entries, and keyed by name. 610 """ 611 result = {} 612 r = self.get_distinfo_resource(EXPORTS_FILENAME) 613 if r: 614 result = self.read_exports() 615 return result 616 617 def read_exports(self): 618 """ 619 Read exports data from a file in .ini format. 620 621 :return: A dictionary of exports, mapping an export category to a list 622 of :class:`ExportEntry` instances describing the individual 623 export entries. 624 """ 625 result = {} 626 r = self.get_distinfo_resource(EXPORTS_FILENAME) 627 if r: 628 with contextlib.closing(r.as_stream()) as stream: 629 result = read_exports(stream) 630 return result 631 632 def write_exports(self, exports): 633 """ 634 Write a dictionary of exports to a file in .ini format. 635 :param exports: A dictionary of exports, mapping an export category to 636 a list of :class:`ExportEntry` instances describing the 637 individual export entries. 638 """ 639 rf = self.get_distinfo_file(EXPORTS_FILENAME) 640 with open(rf, 'w') as f: 641 write_exports(exports, f) 642 643 def get_resource_path(self, relative_path): 644 """ 645 NOTE: This API may change in the future. 646 647 Return the absolute path to a resource file with the given relative 648 path. 649 650 :param relative_path: The path, relative to .dist-info, of the resource 651 of interest. 652 :return: The absolute path where the resource is to be found. 653 """ 654 r = self.get_distinfo_resource('RESOURCES') 655 with contextlib.closing(r.as_stream()) as stream: 656 with CSVReader(stream=stream) as resources_reader: 657 for relative, destination in resources_reader: 658 if relative == relative_path: 659 return destination 660 raise KeyError('no resource file with relative path %r ' 661 'is installed' % relative_path) 662 663 def list_installed_files(self): 664 """ 665 Iterates over the ``RECORD`` entries and returns a tuple 666 ``(path, hash, size)`` for each line. 667 668 :returns: iterator of (path, hash, size) 669 """ 670 for result in self._get_records(): 671 yield result 672 673 def write_installed_files(self, paths, prefix, dry_run=False): 674 """ 675 Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any 676 existing ``RECORD`` file is silently overwritten. 677 678 prefix is used to determine when to write absolute paths. 679 """ 680 prefix = os.path.join(prefix, '') 681 base = os.path.dirname(self.path) 682 base_under_prefix = base.startswith(prefix) 683 base = os.path.join(base, '') 684 record_path = self.get_distinfo_file('RECORD') 685 logger.info('creating %s', record_path) 686 if dry_run: 687 return None 688 with CSVWriter(record_path) as writer: 689 for path in paths: 690 if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')): 691 # do not put size and hash, as in PEP-376 692 hash_value = size = '' 693 else: 694 size = '%d' % os.path.getsize(path) 695 with open(path, 'rb') as fp: 696 hash_value = self.get_hash(fp.read()) 697 if path.startswith(base) or (base_under_prefix and 698 path.startswith(prefix)): 699 path = os.path.relpath(path, base) 700 writer.writerow((path, hash_value, size)) 701 702 # add the RECORD file itself 703 if record_path.startswith(base): 704 record_path = os.path.relpath(record_path, base) 705 writer.writerow((record_path, '', '')) 706 return record_path 707 708 def check_installed_files(self): 709 """ 710 Checks that the hashes and sizes of the files in ``RECORD`` are 711 matched by the files themselves. Returns a (possibly empty) list of 712 mismatches. Each entry in the mismatch list will be a tuple consisting 713 of the path, 'exists', 'size' or 'hash' according to what didn't match 714 (existence is checked first, then size, then hash), the expected 715 value and the actual value. 716 """ 717 mismatches = [] 718 base = os.path.dirname(self.path) 719 record_path = self.get_distinfo_file('RECORD') 720 for path, hash_value, size in self.list_installed_files(): 721 if not os.path.isabs(path): 722 path = os.path.join(base, path) 723 if path == record_path: 724 continue 725 if not os.path.exists(path): 726 mismatches.append((path, 'exists', True, False)) 727 elif os.path.isfile(path): 728 actual_size = str(os.path.getsize(path)) 729 if size and actual_size != size: 730 mismatches.append((path, 'size', size, actual_size)) 731 elif hash_value: 732 if '=' in hash_value: 733 hasher = hash_value.split('=', 1)[0] 734 else: 735 hasher = None 736 737 with open(path, 'rb') as f: 738 actual_hash = self.get_hash(f.read(), hasher) 739 if actual_hash != hash_value: 740 mismatches.append((path, 'hash', hash_value, actual_hash)) 741 return mismatches 742 743 @cached_property 744 def shared_locations(self): 745 """ 746 A dictionary of shared locations whose keys are in the set 'prefix', 747 'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'. 748 The corresponding value is the absolute path of that category for 749 this distribution, and takes into account any paths selected by the 750 user at installation time (e.g. via command-line arguments). In the 751 case of the 'namespace' key, this would be a list of absolute paths 752 for the roots of namespace packages in this distribution. 753 754 The first time this property is accessed, the relevant information is 755 read from the SHARED file in the .dist-info directory. 756 """ 757 result = {} 758 shared_path = os.path.join(self.path, 'SHARED') 759 if os.path.isfile(shared_path): 760 with codecs.open(shared_path, 'r', encoding='utf-8') as f: 761 lines = f.read().splitlines() 762 for line in lines: 763 key, value = line.split('=', 1) 764 if key == 'namespace': 765 result.setdefault(key, []).append(value) 766 else: 767 result[key] = value 768 return result 769 770 def write_shared_locations(self, paths, dry_run=False): 771 """ 772 Write shared location information to the SHARED file in .dist-info. 773 :param paths: A dictionary as described in the documentation for 774 :meth:`shared_locations`. 775 :param dry_run: If True, the action is logged but no file is actually 776 written. 777 :return: The path of the file written to. 778 """ 779 shared_path = os.path.join(self.path, 'SHARED') 780 logger.info('creating %s', shared_path) 781 if dry_run: 782 return None 783 lines = [] 784 for key in ('prefix', 'lib', 'headers', 'scripts', 'data'): 785 path = paths[key] 786 if os.path.isdir(paths[key]): 787 lines.append('%s=%s' % (key, path)) 788 for ns in paths.get('namespace', ()): 789 lines.append('namespace=%s' % ns) 790 791 with codecs.open(shared_path, 'w', encoding='utf-8') as f: 792 f.write('\n'.join(lines)) 793 return shared_path 794 795 def get_distinfo_resource(self, path): 796 if path not in DIST_FILES: 797 raise DistlibException('invalid path for a dist-info file: ' 798 '%r at %r' % (path, self.path)) 799 finder = resources.finder_for_path(self.path) 800 if finder is None: 801 raise DistlibException('Unable to get a finder for %s' % self.path) 802 return finder.find(path) 803 804 def get_distinfo_file(self, path): 805 """ 806 Returns a path located under the ``.dist-info`` directory. Returns a 807 string representing the path. 808 809 :parameter path: a ``'/'``-separated path relative to the 810 ``.dist-info`` directory or an absolute path; 811 If *path* is an absolute path and doesn't start 812 with the ``.dist-info`` directory path, 813 a :class:`DistlibException` is raised 814 :type path: str 815 :rtype: str 816 """ 817 # Check if it is an absolute path # XXX use relpath, add tests 818 if path.find(os.sep) >= 0: 819 # it's an absolute path? 820 distinfo_dirname, path = path.split(os.sep)[-2:] 821 if distinfo_dirname != self.path.split(os.sep)[-1]: 822 raise DistlibException( 823 'dist-info file %r does not belong to the %r %s ' 824 'distribution' % (path, self.name, self.version)) 825 826 # The file must be relative 827 if path not in DIST_FILES: 828 raise DistlibException('invalid path for a dist-info file: ' 829 '%r at %r' % (path, self.path)) 830 831 return os.path.join(self.path, path) 832 833 def list_distinfo_files(self): 834 """ 835 Iterates over the ``RECORD`` entries and returns paths for each line if 836 the path is pointing to a file located in the ``.dist-info`` directory 837 or one of its subdirectories. 838 839 :returns: iterator of paths 840 """ 841 base = os.path.dirname(self.path) 842 for path, checksum, size in self._get_records(): 843 # XXX add separator or use real relpath algo 844 if not os.path.isabs(path): 845 path = os.path.join(base, path) 846 if path.startswith(self.path): 847 yield path 848 849 def __eq__(self, other): 850 return (isinstance(other, InstalledDistribution) and 851 self.path == other.path) 852 853 # See http://docs.python.org/reference/datamodel#object.__hash__ 854 __hash__ = object.__hash__ 855 856 857class EggInfoDistribution(BaseInstalledDistribution): 858 """Created with the *path* of the ``.egg-info`` directory or file provided 859 to the constructor. It reads the metadata contained in the file itself, or 860 if the given path happens to be a directory, the metadata is read from the 861 file ``PKG-INFO`` under that directory.""" 862 863 requested = True # as we have no way of knowing, assume it was 864 shared_locations = {} 865 866 def __init__(self, path, env=None): 867 def set_name_and_version(s, n, v): 868 s.name = n 869 s.key = n.lower() # for case-insensitive comparisons 870 s.version = v 871 872 self.path = path 873 self.dist_path = env 874 if env and env._cache_enabled and path in env._cache_egg.path: 875 metadata = env._cache_egg.path[path].metadata 876 set_name_and_version(self, metadata.name, metadata.version) 877 else: 878 metadata = self._get_metadata(path) 879 880 # Need to be set before caching 881 set_name_and_version(self, metadata.name, metadata.version) 882 883 if env and env._cache_enabled: 884 env._cache_egg.add(self) 885 super(EggInfoDistribution, self).__init__(metadata, path, env) 886 887 def _get_metadata(self, path): 888 requires = None 889 890 def parse_requires_data(data): 891 """Create a list of dependencies from a requires.txt file. 892 893 *data*: the contents of a setuptools-produced requires.txt file. 894 """ 895 reqs = [] 896 lines = data.splitlines() 897 for line in lines: 898 line = line.strip() 899 if line.startswith('['): 900 logger.warning('Unexpected line: quitting requirement scan: %r', 901 line) 902 break 903 r = parse_requirement(line) 904 if not r: 905 logger.warning('Not recognised as a requirement: %r', line) 906 continue 907 if r.extras: 908 logger.warning('extra requirements in requires.txt are ' 909 'not supported') 910 if not r.constraints: 911 reqs.append(r.name) 912 else: 913 cons = ', '.join('%s%s' % c for c in r.constraints) 914 reqs.append('%s (%s)' % (r.name, cons)) 915 return reqs 916 917 def parse_requires_path(req_path): 918 """Create a list of dependencies from a requires.txt file. 919 920 *req_path*: the path to a setuptools-produced requires.txt file. 921 """ 922 923 reqs = [] 924 try: 925 with codecs.open(req_path, 'r', 'utf-8') as fp: 926 reqs = parse_requires_data(fp.read()) 927 except IOError: 928 pass 929 return reqs 930 931 tl_path = tl_data = None 932 if path.endswith('.egg'): 933 if os.path.isdir(path): 934 p = os.path.join(path, 'EGG-INFO') 935 meta_path = os.path.join(p, 'PKG-INFO') 936 metadata = Metadata(path=meta_path, scheme='legacy') 937 req_path = os.path.join(p, 'requires.txt') 938 tl_path = os.path.join(p, 'top_level.txt') 939 requires = parse_requires_path(req_path) 940 else: 941 # FIXME handle the case where zipfile is not available 942 zipf = zipimport.zipimporter(path) 943 fileobj = StringIO( 944 zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8')) 945 metadata = Metadata(fileobj=fileobj, scheme='legacy') 946 try: 947 data = zipf.get_data('EGG-INFO/requires.txt') 948 tl_data = zipf.get_data('EGG-INFO/top_level.txt').decode('utf-8') 949 requires = parse_requires_data(data.decode('utf-8')) 950 except IOError: 951 requires = None 952 elif path.endswith('.egg-info'): 953 if os.path.isdir(path): 954 req_path = os.path.join(path, 'requires.txt') 955 requires = parse_requires_path(req_path) 956 path = os.path.join(path, 'PKG-INFO') 957 tl_path = os.path.join(path, 'top_level.txt') 958 metadata = Metadata(path=path, scheme='legacy') 959 else: 960 raise DistlibException('path must end with .egg-info or .egg, ' 961 'got %r' % path) 962 963 if requires: 964 metadata.add_requirements(requires) 965 # look for top-level modules in top_level.txt, if present 966 if tl_data is None: 967 if tl_path is not None and os.path.exists(tl_path): 968 with open(tl_path, 'rb') as f: 969 tl_data = f.read().decode('utf-8') 970 if not tl_data: 971 tl_data = [] 972 else: 973 tl_data = tl_data.splitlines() 974 self.modules = tl_data 975 return metadata 976 977 def __repr__(self): 978 return '<EggInfoDistribution %r %s at %r>' % ( 979 self.name, self.version, self.path) 980 981 def __str__(self): 982 return "%s %s" % (self.name, self.version) 983 984 def check_installed_files(self): 985 """ 986 Checks that the hashes and sizes of the files in ``RECORD`` are 987 matched by the files themselves. Returns a (possibly empty) list of 988 mismatches. Each entry in the mismatch list will be a tuple consisting 989 of the path, 'exists', 'size' or 'hash' according to what didn't match 990 (existence is checked first, then size, then hash), the expected 991 value and the actual value. 992 """ 993 mismatches = [] 994 record_path = os.path.join(self.path, 'installed-files.txt') 995 if os.path.exists(record_path): 996 for path, _, _ in self.list_installed_files(): 997 if path == record_path: 998 continue 999 if not os.path.exists(path): 1000 mismatches.append((path, 'exists', True, False)) 1001 return mismatches 1002 1003 def list_installed_files(self): 1004 """ 1005 Iterates over the ``installed-files.txt`` entries and returns a tuple 1006 ``(path, hash, size)`` for each line. 1007 1008 :returns: a list of (path, hash, size) 1009 """ 1010 1011 def _md5(path): 1012 f = open(path, 'rb') 1013 try: 1014 content = f.read() 1015 finally: 1016 f.close() 1017 return hashlib.md5(content).hexdigest() 1018 1019 def _size(path): 1020 return os.stat(path).st_size 1021 1022 record_path = os.path.join(self.path, 'installed-files.txt') 1023 result = [] 1024 if os.path.exists(record_path): 1025 with codecs.open(record_path, 'r', encoding='utf-8') as f: 1026 for line in f: 1027 line = line.strip() 1028 p = os.path.normpath(os.path.join(self.path, line)) 1029 # "./" is present as a marker between installed files 1030 # and installation metadata files 1031 if not os.path.exists(p): 1032 logger.warning('Non-existent file: %s', p) 1033 if p.endswith(('.pyc', '.pyo')): 1034 continue 1035 #otherwise fall through and fail 1036 if not os.path.isdir(p): 1037 result.append((p, _md5(p), _size(p))) 1038 result.append((record_path, None, None)) 1039 return result 1040 1041 def list_distinfo_files(self, absolute=False): 1042 """ 1043 Iterates over the ``installed-files.txt`` entries and returns paths for 1044 each line if the path is pointing to a file located in the 1045 ``.egg-info`` directory or one of its subdirectories. 1046 1047 :parameter absolute: If *absolute* is ``True``, each returned path is 1048 transformed into a local absolute path. Otherwise the 1049 raw value from ``installed-files.txt`` is returned. 1050 :type absolute: boolean 1051 :returns: iterator of paths 1052 """ 1053 record_path = os.path.join(self.path, 'installed-files.txt') 1054 if os.path.exists(record_path): 1055 skip = True 1056 with codecs.open(record_path, 'r', encoding='utf-8') as f: 1057 for line in f: 1058 line = line.strip() 1059 if line == './': 1060 skip = False 1061 continue 1062 if not skip: 1063 p = os.path.normpath(os.path.join(self.path, line)) 1064 if p.startswith(self.path): 1065 if absolute: 1066 yield p 1067 else: 1068 yield line 1069 1070 def __eq__(self, other): 1071 return (isinstance(other, EggInfoDistribution) and 1072 self.path == other.path) 1073 1074 # See http://docs.python.org/reference/datamodel#object.__hash__ 1075 __hash__ = object.__hash__ 1076 1077new_dist_class = InstalledDistribution 1078old_dist_class = EggInfoDistribution 1079 1080 1081class DependencyGraph(object): 1082 """ 1083 Represents a dependency graph between distributions. 1084 1085 The dependency relationships are stored in an ``adjacency_list`` that maps 1086 distributions to a list of ``(other, label)`` tuples where ``other`` 1087 is a distribution and the edge is labeled with ``label`` (i.e. the version 1088 specifier, if such was provided). Also, for more efficient traversal, for 1089 every distribution ``x``, a list of predecessors is kept in 1090 ``reverse_list[x]``. An edge from distribution ``a`` to 1091 distribution ``b`` means that ``a`` depends on ``b``. If any missing 1092 dependencies are found, they are stored in ``missing``, which is a 1093 dictionary that maps distributions to a list of requirements that were not 1094 provided by any other distributions. 1095 """ 1096 1097 def __init__(self): 1098 self.adjacency_list = {} 1099 self.reverse_list = {} 1100 self.missing = {} 1101 1102 def add_distribution(self, distribution): 1103 """Add the *distribution* to the graph. 1104 1105 :type distribution: :class:`distutils2.database.InstalledDistribution` 1106 or :class:`distutils2.database.EggInfoDistribution` 1107 """ 1108 self.adjacency_list[distribution] = [] 1109 self.reverse_list[distribution] = [] 1110 #self.missing[distribution] = [] 1111 1112 def add_edge(self, x, y, label=None): 1113 """Add an edge from distribution *x* to distribution *y* with the given 1114 *label*. 1115 1116 :type x: :class:`distutils2.database.InstalledDistribution` or 1117 :class:`distutils2.database.EggInfoDistribution` 1118 :type y: :class:`distutils2.database.InstalledDistribution` or 1119 :class:`distutils2.database.EggInfoDistribution` 1120 :type label: ``str`` or ``None`` 1121 """ 1122 self.adjacency_list[x].append((y, label)) 1123 # multiple edges are allowed, so be careful 1124 if x not in self.reverse_list[y]: 1125 self.reverse_list[y].append(x) 1126 1127 def add_missing(self, distribution, requirement): 1128 """ 1129 Add a missing *requirement* for the given *distribution*. 1130 1131 :type distribution: :class:`distutils2.database.InstalledDistribution` 1132 or :class:`distutils2.database.EggInfoDistribution` 1133 :type requirement: ``str`` 1134 """ 1135 logger.debug('%s missing %r', distribution, requirement) 1136 self.missing.setdefault(distribution, []).append(requirement) 1137 1138 def _repr_dist(self, dist): 1139 return '%s %s' % (dist.name, dist.version) 1140 1141 def repr_node(self, dist, level=1): 1142 """Prints only a subgraph""" 1143 output = [self._repr_dist(dist)] 1144 for other, label in self.adjacency_list[dist]: 1145 dist = self._repr_dist(other) 1146 if label is not None: 1147 dist = '%s [%s]' % (dist, label) 1148 output.append(' ' * level + str(dist)) 1149 suboutput = self.repr_node(other, level + 1) 1150 subs = suboutput.split('\n') 1151 output.extend(subs[1:]) 1152 return '\n'.join(output) 1153 1154 def to_dot(self, f, skip_disconnected=True): 1155 """Writes a DOT output for the graph to the provided file *f*. 1156 1157 If *skip_disconnected* is set to ``True``, then all distributions 1158 that are not dependent on any other distribution are skipped. 1159 1160 :type f: has to support ``file``-like operations 1161 :type skip_disconnected: ``bool`` 1162 """ 1163 disconnected = [] 1164 1165 f.write("digraph dependencies {\n") 1166 for dist, adjs in self.adjacency_list.items(): 1167 if len(adjs) == 0 and not skip_disconnected: 1168 disconnected.append(dist) 1169 for other, label in adjs: 1170 if not label is None: 1171 f.write('"%s" -> "%s" [label="%s"]\n' % 1172 (dist.name, other.name, label)) 1173 else: 1174 f.write('"%s" -> "%s"\n' % (dist.name, other.name)) 1175 if not skip_disconnected and len(disconnected) > 0: 1176 f.write('subgraph disconnected {\n') 1177 f.write('label = "Disconnected"\n') 1178 f.write('bgcolor = red\n') 1179 1180 for dist in disconnected: 1181 f.write('"%s"' % dist.name) 1182 f.write('\n') 1183 f.write('}\n') 1184 f.write('}\n') 1185 1186 def topological_sort(self): 1187 """ 1188 Perform a topological sort of the graph. 1189 :return: A tuple, the first element of which is a topologically sorted 1190 list of distributions, and the second element of which is a 1191 list of distributions that cannot be sorted because they have 1192 circular dependencies and so form a cycle. 1193 """ 1194 result = [] 1195 # Make a shallow copy of the adjacency list 1196 alist = {} 1197 for k, v in self.adjacency_list.items(): 1198 alist[k] = v[:] 1199 while True: 1200 # See what we can remove in this run 1201 to_remove = [] 1202 for k, v in list(alist.items())[:]: 1203 if not v: 1204 to_remove.append(k) 1205 del alist[k] 1206 if not to_remove: 1207 # What's left in alist (if anything) is a cycle. 1208 break 1209 # Remove from the adjacency list of others 1210 for k, v in alist.items(): 1211 alist[k] = [(d, r) for d, r in v if d not in to_remove] 1212 logger.debug('Moving to result: %s', 1213 ['%s (%s)' % (d.name, d.version) for d in to_remove]) 1214 result.extend(to_remove) 1215 return result, list(alist.keys()) 1216 1217 def __repr__(self): 1218 """Representation of the graph""" 1219 output = [] 1220 for dist, adjs in self.adjacency_list.items(): 1221 output.append(self.repr_node(dist)) 1222 return '\n'.join(output) 1223 1224 1225def make_graph(dists, scheme='default'): 1226 """Makes a dependency graph from the given distributions. 1227 1228 :parameter dists: a list of distributions 1229 :type dists: list of :class:`distutils2.database.InstalledDistribution` and 1230 :class:`distutils2.database.EggInfoDistribution` instances 1231 :rtype: a :class:`DependencyGraph` instance 1232 """ 1233 scheme = get_scheme(scheme) 1234 graph = DependencyGraph() 1235 provided = {} # maps names to lists of (version, dist) tuples 1236 1237 # first, build the graph and find out what's provided 1238 for dist in dists: 1239 graph.add_distribution(dist) 1240 1241 for p in dist.provides: 1242 name, version = parse_name_and_version(p) 1243 logger.debug('Add to provided: %s, %s, %s', name, version, dist) 1244 provided.setdefault(name, []).append((version, dist)) 1245 1246 # now make the edges 1247 for dist in dists: 1248 requires = (dist.run_requires | dist.meta_requires | 1249 dist.build_requires | dist.dev_requires) 1250 for req in requires: 1251 try: 1252 matcher = scheme.matcher(req) 1253 except UnsupportedVersionError: 1254 # XXX compat-mode if cannot read the version 1255 logger.warning('could not read version %r - using name only', 1256 req) 1257 name = req.split()[0] 1258 matcher = scheme.matcher(name) 1259 1260 name = matcher.key # case-insensitive 1261 1262 matched = False 1263 if name in provided: 1264 for version, provider in provided[name]: 1265 try: 1266 match = matcher.match(version) 1267 except UnsupportedVersionError: 1268 match = False 1269 1270 if match: 1271 graph.add_edge(dist, provider, req) 1272 matched = True 1273 break 1274 if not matched: 1275 graph.add_missing(dist, req) 1276 return graph 1277 1278 1279def get_dependent_dists(dists, dist): 1280 """Recursively generate a list of distributions from *dists* that are 1281 dependent on *dist*. 1282 1283 :param dists: a list of distributions 1284 :param dist: a distribution, member of *dists* for which we are interested 1285 """ 1286 if dist not in dists: 1287 raise DistlibException('given distribution %r is not a member ' 1288 'of the list' % dist.name) 1289 graph = make_graph(dists) 1290 1291 dep = [dist] # dependent distributions 1292 todo = graph.reverse_list[dist] # list of nodes we should inspect 1293 1294 while todo: 1295 d = todo.pop() 1296 dep.append(d) 1297 for succ in graph.reverse_list[d]: 1298 if succ not in dep: 1299 todo.append(succ) 1300 1301 dep.pop(0) # remove dist from dep, was there to prevent infinite loops 1302 return dep 1303 1304 1305def get_required_dists(dists, dist): 1306 """Recursively generate a list of distributions from *dists* that are 1307 required by *dist*. 1308 1309 :param dists: a list of distributions 1310 :param dist: a distribution, member of *dists* for which we are interested 1311 """ 1312 if dist not in dists: 1313 raise DistlibException('given distribution %r is not a member ' 1314 'of the list' % dist.name) 1315 graph = make_graph(dists) 1316 1317 req = [] # required distributions 1318 todo = graph.adjacency_list[dist] # list of nodes we should inspect 1319 1320 while todo: 1321 d = todo.pop()[0] 1322 req.append(d) 1323 for pred in graph.adjacency_list[d]: 1324 if pred not in req: 1325 todo.append(pred) 1326 1327 return req 1328 1329 1330def make_dist(name, version, **kwargs): 1331 """ 1332 A convenience method for making a dist given just a name and version. 1333 """ 1334 summary = kwargs.pop('summary', 'Placeholder for summary') 1335 md = Metadata(**kwargs) 1336 md.name = name 1337 md.version = version 1338 md.summary = summary or 'Placeholder for summary' 1339 return Distribution(md) 1340