1# -*- test-case-name: twisted.test.test_modules -*- 2# Copyright (c) Twisted Matrix Laboratories. 3# See LICENSE for details. 4 5""" 6This module aims to provide a unified, object-oriented view of Python's 7runtime hierarchy. 8 9Python is a very dynamic language with wide variety of introspection utilities. 10However, these utilities can be hard to use, because there is no consistent 11API. The introspection API in python is made up of attributes (__name__, 12__module__, func_name, etc) on instances, modules, classes and functions which 13vary between those four types, utility modules such as 'inspect' which provide 14some functionality, the 'imp' module, the "compiler" module, the semantics of 15PEP 302 support, and setuptools, among other things. 16 17At the top, you have "PythonPath", an abstract representation of sys.path which 18includes methods to locate top-level modules, with or without loading them. 19The top-level exposed functions in this module for accessing the system path 20are "walkModules", "iterModules", and "getModule". 21 22From most to least specific, here are the objects provided:: 23 24 PythonPath # sys.path 25 | 26 v 27 PathEntry # one entry on sys.path: an importer 28 | 29 v 30 PythonModule # a module or package that can be loaded 31 | 32 v 33 PythonAttribute # an attribute of a module (function or class) 34 | 35 v 36 PythonAttribute # an attribute of a function or class 37 | 38 v 39 ... 40 41Here's an example of idiomatic usage: this is what you would do to list all of 42the modules outside the standard library's python-files directory:: 43 44 import os 45 stdlibdir = os.path.dirname(os.__file__) 46 47 from twisted.python.modules import iterModules 48 49 for modinfo in iterModules(): 50 if (modinfo.pathEntry.filePath.path != stdlibdir 51 and not modinfo.isPackage()): 52 print('unpackaged: %s: %s' % ( 53 modinfo.name, modinfo.filePath.path)) 54 55@var theSystemPath: The very top of the Python object space. 56@type theSystemPath: L{PythonPath} 57""" 58 59 60import inspect 61import sys 62import warnings 63import zipimport 64 65# let's try to keep path imports to a minimum... 66from os.path import dirname, split as splitpath 67from typing import cast 68 69from zope.interface import Interface, implementer 70 71from twisted.python.compat import nativeString 72from twisted.python.components import registerAdapter 73from twisted.python.filepath import FilePath, UnlistableError 74from twisted.python.reflect import namedAny 75from twisted.python.zippath import ZipArchive 76 77_nothing = object() 78 79PYTHON_EXTENSIONS = [".py"] 80OPTIMIZED_MODE = __doc__ is None 81if OPTIMIZED_MODE: 82 PYTHON_EXTENSIONS.append(".pyo") 83else: 84 PYTHON_EXTENSIONS.append(".pyc") 85 86 87def _isPythonIdentifier(string): 88 """ 89 cheezy fake test for proper identifier-ness. 90 91 @param string: a L{str} which might or might not be a valid python 92 identifier. 93 @return: True or False 94 """ 95 textString = nativeString(string) 96 return " " not in textString and "." not in textString and "-" not in textString 97 98 99def _isPackagePath(fpath): 100 # Determine if a FilePath-like object is a Python package. TODO: deal with 101 # __init__module.(so|dll|pyd)? 102 extless = fpath.splitext()[0] 103 basend = splitpath(extless)[1] 104 return basend == "__init__" 105 106 107class _ModuleIteratorHelper: 108 """ 109 This mixin provides common behavior between python module and path entries, 110 since the mechanism for searching sys.path and __path__ attributes is 111 remarkably similar. 112 """ 113 114 def iterModules(self): 115 """ 116 Loop over the modules present below this entry or package on PYTHONPATH. 117 118 For modules which are not packages, this will yield nothing. 119 120 For packages and path entries, this will only yield modules one level 121 down; i.e. if there is a package a.b.c, iterModules on a will only 122 return a.b. If you want to descend deeply, use walkModules. 123 124 @return: a generator which yields PythonModule instances that describe 125 modules which can be, or have been, imported. 126 """ 127 yielded = {} 128 if not self.filePath.exists(): 129 return 130 131 for placeToLook in self._packagePaths(): 132 try: 133 children = sorted(placeToLook.children()) 134 except UnlistableError: 135 continue 136 137 for potentialTopLevel in children: 138 ext = potentialTopLevel.splitext()[1] 139 potentialBasename = potentialTopLevel.basename()[: -len(ext)] 140 if ext in PYTHON_EXTENSIONS: 141 # TODO: this should be a little choosier about which path entry 142 # it selects first, and it should do all the .so checking and 143 # crud 144 if not _isPythonIdentifier(potentialBasename): 145 continue 146 modname = self._subModuleName(potentialBasename) 147 if modname.split(".")[-1] == "__init__": 148 # This marks the directory as a package so it can't be 149 # a module. 150 continue 151 if modname not in yielded: 152 yielded[modname] = True 153 pm = PythonModule(modname, potentialTopLevel, self._getEntry()) 154 assert pm != self 155 yield pm 156 else: 157 if ( 158 ext 159 or not _isPythonIdentifier(potentialBasename) 160 or not potentialTopLevel.isdir() 161 ): 162 continue 163 modname = self._subModuleName(potentialTopLevel.basename()) 164 for ext in PYTHON_EXTENSIONS: 165 initpy = potentialTopLevel.child("__init__" + ext) 166 if initpy.exists() and modname not in yielded: 167 yielded[modname] = True 168 pm = PythonModule(modname, initpy, self._getEntry()) 169 assert pm != self 170 yield pm 171 break 172 173 def walkModules(self, importPackages=False): 174 """ 175 Similar to L{iterModules}, this yields self, and then every module in my 176 package or entry, and every submodule in each package or entry. 177 178 In other words, this is deep, and L{iterModules} is shallow. 179 """ 180 yield self 181 for package in self.iterModules(): 182 yield from package.walkModules(importPackages=importPackages) 183 184 def _subModuleName(self, mn): 185 """ 186 This is a hook to provide packages with the ability to specify their names 187 as a prefix to submodules here. 188 """ 189 return mn 190 191 def _packagePaths(self): 192 """ 193 Implement in subclasses to specify where to look for modules. 194 195 @return: iterable of FilePath-like objects. 196 """ 197 raise NotImplementedError() 198 199 def _getEntry(self): 200 """ 201 Implement in subclasses to specify what path entry submodules will come 202 from. 203 204 @return: a PathEntry instance. 205 """ 206 raise NotImplementedError() 207 208 def __getitem__(self, modname): 209 """ 210 Retrieve a module from below this path or package. 211 212 @param modname: a str naming a module to be loaded. For entries, this 213 is a top-level, undotted package name, and for packages it is the name 214 of the module without the package prefix. For example, if you have a 215 PythonModule representing the 'twisted' package, you could use:: 216 217 twistedPackageObj['python']['modules'] 218 219 to retrieve this module. 220 221 @raise KeyError: if the module is not found. 222 223 @return: a PythonModule. 224 """ 225 for module in self.iterModules(): 226 if module.name == self._subModuleName(modname): 227 return module 228 raise KeyError(modname) 229 230 def __iter__(self): 231 """ 232 Implemented to raise NotImplementedError for clarity, so that attempting to 233 loop over this object won't call __getitem__. 234 235 Note: in the future there might be some sensible default for iteration, 236 like 'walkEverything', so this is deliberately untested and undefined 237 behavior. 238 """ 239 raise NotImplementedError() 240 241 242class PythonAttribute: 243 """ 244 I represent a function, class, or other object that is present. 245 246 @ivar name: the fully-qualified python name of this attribute. 247 248 @ivar onObject: a reference to a PythonModule or other PythonAttribute that 249 is this attribute's logical parent. 250 251 @ivar name: the fully qualified python name of the attribute represented by 252 this class. 253 """ 254 255 def __init__(self, name, onObject, loaded, pythonValue): 256 """ 257 Create a PythonAttribute. This is a private constructor. Do not construct 258 me directly, use PythonModule.iterAttributes. 259 260 @param name: the FQPN 261 @param onObject: see ivar 262 @param loaded: always True, for now 263 @param pythonValue: the value of the attribute we're pointing to. 264 """ 265 self.name = name 266 self.onObject = onObject 267 self._loaded = loaded 268 self.pythonValue = pythonValue 269 270 def __repr__(self) -> str: 271 return f"PythonAttribute<{self.name!r}>" 272 273 def isLoaded(self): 274 """ 275 Return a boolean describing whether the attribute this describes has 276 actually been loaded into memory by importing its module. 277 278 Note: this currently always returns true; there is no Python parser 279 support in this module yet. 280 """ 281 return self._loaded 282 283 def load(self, default=_nothing): 284 """ 285 Load the value associated with this attribute. 286 287 @return: an arbitrary Python object, or 'default' if there is an error 288 loading it. 289 """ 290 return self.pythonValue 291 292 def iterAttributes(self): 293 for name, val in inspect.getmembers(self.load()): 294 yield PythonAttribute(self.name + "." + name, self, True, val) 295 296 297class PythonModule(_ModuleIteratorHelper): 298 """ 299 Representation of a module which could be imported from sys.path. 300 301 @ivar name: the fully qualified python name of this module. 302 303 @ivar filePath: a FilePath-like object which points to the location of this 304 module. 305 306 @ivar pathEntry: a L{PathEntry} instance which this module was located 307 from. 308 """ 309 310 def __init__(self, name, filePath, pathEntry): 311 """ 312 Create a PythonModule. Do not construct this directly, instead inspect a 313 PythonPath or other PythonModule instances. 314 315 @param name: see ivar 316 @param filePath: see ivar 317 @param pathEntry: see ivar 318 """ 319 _name = nativeString(name) 320 assert not _name.endswith(".__init__") 321 self.name = _name 322 self.filePath = filePath 323 self.parentPath = filePath.parent() 324 self.pathEntry = pathEntry 325 326 def _getEntry(self): 327 return self.pathEntry 328 329 def __repr__(self) -> str: 330 """ 331 Return a string representation including the module name. 332 """ 333 return f"PythonModule<{self.name!r}>" 334 335 def isLoaded(self): 336 """ 337 Determine if the module is loaded into sys.modules. 338 339 @return: a boolean: true if loaded, false if not. 340 """ 341 return self.pathEntry.pythonPath.moduleDict.get(self.name) is not None 342 343 def iterAttributes(self): 344 """ 345 List all the attributes defined in this module. 346 347 Note: Future work is planned here to make it possible to list python 348 attributes on a module without loading the module by inspecting ASTs or 349 bytecode, but currently any iteration of PythonModule objects insists 350 they must be loaded, and will use inspect.getmodule. 351 352 @raise NotImplementedError: if this module is not loaded. 353 354 @return: a generator yielding PythonAttribute instances describing the 355 attributes of this module. 356 """ 357 if not self.isLoaded(): 358 raise NotImplementedError( 359 "You can't load attributes from non-loaded modules yet." 360 ) 361 for name, val in inspect.getmembers(self.load()): 362 yield PythonAttribute(self.name + "." + name, self, True, val) 363 364 def isPackage(self): 365 """ 366 Returns true if this module is also a package, and might yield something 367 from iterModules. 368 """ 369 return _isPackagePath(self.filePath) 370 371 def load(self, default=_nothing): 372 """ 373 Load this module. 374 375 @param default: if specified, the value to return in case of an error. 376 377 @return: a genuine python module. 378 379 @raise Exception: Importing modules is a risky business; 380 the erorrs of any code run at module scope may be raised from here, as 381 well as ImportError if something bizarre happened to the system path 382 between the discovery of this PythonModule object and the attempt to 383 import it. If you specify a default, the error will be swallowed 384 entirely, and not logged. 385 386 @rtype: types.ModuleType. 387 """ 388 try: 389 return self.pathEntry.pythonPath.moduleLoader(self.name) 390 except BaseException: # this needs more thought... 391 if default is not _nothing: 392 return default 393 raise 394 395 def __eq__(self, other: object) -> bool: 396 """ 397 PythonModules with the same name are equal. 398 """ 399 if isinstance(other, PythonModule): 400 return cast(bool, other.name == self.name) 401 return NotImplemented 402 403 def walkModules(self, importPackages=False): 404 if importPackages and self.isPackage(): 405 self.load() 406 return super().walkModules(importPackages=importPackages) 407 408 def _subModuleName(self, mn): 409 """ 410 submodules of this module are prefixed with our name. 411 """ 412 return self.name + "." + mn 413 414 def _packagePaths(self): 415 """ 416 Yield a sequence of FilePath-like objects which represent path segments. 417 """ 418 if not self.isPackage(): 419 return 420 if self.isLoaded(): 421 load = self.load() 422 if hasattr(load, "__path__"): 423 for fn in load.__path__: 424 if fn == self.parentPath.path: 425 # this should _really_ exist. 426 assert self.parentPath.exists() 427 yield self.parentPath 428 else: 429 smp = self.pathEntry.pythonPath._smartPath(fn) 430 if smp.exists(): 431 yield smp 432 else: 433 yield self.parentPath 434 435 436class PathEntry(_ModuleIteratorHelper): 437 """ 438 I am a proxy for a single entry on sys.path. 439 440 @ivar filePath: a FilePath-like object pointing at the filesystem location 441 or archive file where this path entry is stored. 442 443 @ivar pythonPath: a PythonPath instance. 444 """ 445 446 def __init__(self, filePath, pythonPath): 447 """ 448 Create a PathEntry. This is a private constructor. 449 """ 450 self.filePath = filePath 451 self.pythonPath = pythonPath 452 453 def _getEntry(self): 454 return self 455 456 def __repr__(self) -> str: 457 return f"PathEntry<{self.filePath!r}>" 458 459 def _packagePaths(self): 460 yield self.filePath 461 462 463class IPathImportMapper(Interface): 464 """ 465 This is an internal interface, used to map importers to factories for 466 FilePath-like objects. 467 """ 468 469 def mapPath(pathLikeString): 470 """ 471 Return a FilePath-like object. 472 473 @param pathLikeString: a path-like string, like one that might be 474 passed to an import hook. 475 476 @return: a L{FilePath}, or something like it (currently only a 477 L{ZipPath}, but more might be added later). 478 """ 479 480 481@implementer(IPathImportMapper) 482class _DefaultMapImpl: 483 """Wrapper for the default importer, i.e. None.""" 484 485 def mapPath(self, fsPathString): 486 return FilePath(fsPathString) 487 488 489_theDefaultMapper = _DefaultMapImpl() 490 491 492@implementer(IPathImportMapper) 493class _ZipMapImpl: 494 """IPathImportMapper implementation for zipimport.ZipImporter.""" 495 496 def __init__(self, importer): 497 self.importer = importer 498 499 def mapPath(self, fsPathString): 500 """ 501 Map the given FS path to a ZipPath, by looking at the ZipImporter's 502 "archive" attribute and using it as our ZipArchive root, then walking 503 down into the archive from there. 504 505 @return: a L{zippath.ZipPath} or L{zippath.ZipArchive} instance. 506 """ 507 za = ZipArchive(self.importer.archive) 508 myPath = FilePath(self.importer.archive) 509 itsPath = FilePath(fsPathString) 510 if myPath == itsPath: 511 return za 512 # This is NOT a general-purpose rule for sys.path or __file__: 513 # zipimport specifically uses regular OS path syntax in its 514 # pathnames, even though zip files specify that slashes are always 515 # the separator, regardless of platform. 516 segs = itsPath.segmentsFrom(myPath) 517 zp = za 518 for seg in segs: 519 zp = zp.child(seg) 520 return zp 521 522 523registerAdapter(_ZipMapImpl, zipimport.zipimporter, IPathImportMapper) 524 525 526def _defaultSysPathFactory(): 527 """ 528 Provide the default behavior of PythonPath's sys.path factory, which is to 529 return the current value of sys.path. 530 531 @return: L{sys.path} 532 """ 533 return sys.path 534 535 536class PythonPath: 537 """ 538 I represent the very top of the Python object-space, the module list in 539 C{sys.path} and the modules list in C{sys.modules}. 540 541 @ivar _sysPath: A sequence of strings like C{sys.path}. This attribute is 542 read-only. 543 544 @ivar sysPath: The current value of the module search path list. 545 @type sysPath: C{list} 546 547 @ivar moduleDict: A dictionary mapping string module names to module 548 objects, like C{sys.modules}. 549 550 @ivar sysPathHooks: A list of PEP-302 path hooks, like C{sys.path_hooks}. 551 552 @ivar moduleLoader: A function that takes a fully-qualified python name and 553 returns a module, like L{twisted.python.reflect.namedAny}. 554 """ 555 556 def __init__( 557 self, 558 sysPath=None, 559 moduleDict=sys.modules, 560 sysPathHooks=sys.path_hooks, 561 importerCache=sys.path_importer_cache, 562 moduleLoader=namedAny, 563 sysPathFactory=None, 564 ): 565 """ 566 Create a PythonPath. You almost certainly want to use 567 modules.theSystemPath, or its aliased methods, rather than creating a 568 new instance yourself, though. 569 570 All parameters are optional, and if unspecified, will use 'system' 571 equivalents that makes this PythonPath like the global L{theSystemPath} 572 instance. 573 574 @param sysPath: a sys.path-like list to use for this PythonPath, to 575 specify where to load modules from. 576 577 @param moduleDict: a sys.modules-like dictionary to use for keeping 578 track of what modules this PythonPath has loaded. 579 580 @param sysPathHooks: sys.path_hooks-like list of PEP-302 path hooks to 581 be used for this PythonPath, to determie which importers should be 582 used. 583 584 @param importerCache: a sys.path_importer_cache-like list of PEP-302 585 importers. This will be used in conjunction with the given 586 sysPathHooks. 587 588 @param moduleLoader: a module loader function which takes a string and 589 returns a module. That is to say, it is like L{namedAny} - *not* like 590 L{__import__}. 591 592 @param sysPathFactory: a 0-argument callable which returns the current 593 value of a sys.path-like list of strings. Specify either this, or 594 sysPath, not both. This alternative interface is provided because the 595 way the Python import mechanism works, you can re-bind the 'sys.path' 596 name and that is what is used for current imports, so it must be a 597 factory rather than a value to deal with modification by rebinding 598 rather than modification by mutation. Note: it is not recommended to 599 rebind sys.path. Although this mechanism can deal with that, it is a 600 subtle point which some tools that it is easy for tools which interact 601 with sys.path to miss. 602 """ 603 if sysPath is not None: 604 sysPathFactory = lambda: sysPath 605 elif sysPathFactory is None: 606 sysPathFactory = _defaultSysPathFactory 607 self._sysPathFactory = sysPathFactory 608 self._sysPath = sysPath 609 self.moduleDict = moduleDict 610 self.sysPathHooks = sysPathHooks 611 self.importerCache = importerCache 612 self.moduleLoader = moduleLoader 613 614 @property 615 def sysPath(self): 616 """ 617 Retrieve the current value of the module search path list. 618 """ 619 return self._sysPathFactory() 620 621 def _findEntryPathString(self, modobj): 622 """ 623 Determine where a given Python module object came from by looking at path 624 entries. 625 """ 626 topPackageObj = modobj 627 while "." in topPackageObj.__name__: 628 topPackageObj = self.moduleDict[ 629 ".".join(topPackageObj.__name__.split(".")[:-1]) 630 ] 631 if _isPackagePath(FilePath(topPackageObj.__file__)): 632 # if package 'foo' is on sys.path at /a/b/foo, package 'foo's 633 # __file__ will be /a/b/foo/__init__.py, and we are looking for 634 # /a/b here, the path-entry; so go up two steps. 635 rval = dirname(dirname(topPackageObj.__file__)) 636 else: 637 # the module is completely top-level, not within any packages. The 638 # path entry it's on is just its dirname. 639 rval = dirname(topPackageObj.__file__) 640 641 # There are probably some awful tricks that an importer could pull 642 # which would break this, so let's just make sure... it's a loaded 643 # module after all, which means that its path MUST be in 644 # path_importer_cache according to PEP 302 -glyph 645 if rval not in self.importerCache: 646 warnings.warn( 647 "%s (for module %s) not in path importer cache " 648 "(PEP 302 violation - check your local configuration)." 649 % (rval, modobj.__name__), 650 stacklevel=3, 651 ) 652 653 return rval 654 655 def _smartPath(self, pathName): 656 """ 657 Given a path entry from sys.path which may refer to an importer, 658 return the appropriate FilePath-like instance. 659 660 @param pathName: a str describing the path. 661 662 @return: a FilePath-like object. 663 """ 664 importr = self.importerCache.get(pathName, _nothing) 665 if importr is _nothing: 666 for hook in self.sysPathHooks: 667 try: 668 importr = hook(pathName) 669 except ImportError: 670 pass 671 if importr is _nothing: # still 672 importr = None 673 return IPathImportMapper(importr, _theDefaultMapper).mapPath(pathName) 674 675 def iterEntries(self): 676 """ 677 Iterate the entries on my sysPath. 678 679 @return: a generator yielding PathEntry objects 680 """ 681 for pathName in self.sysPath: 682 fp = self._smartPath(pathName) 683 yield PathEntry(fp, self) 684 685 def __getitem__(self, modname): 686 """ 687 Get a python module by its given fully-qualified name. 688 689 @param modname: The fully-qualified Python module name to load. 690 691 @type modname: C{str} 692 693 @return: an object representing the module identified by C{modname} 694 695 @rtype: L{PythonModule} 696 697 @raise KeyError: if the module name is not a valid module name, or no 698 such module can be identified as loadable. 699 """ 700 # See if the module is already somewhere in Python-land. 701 moduleObject = self.moduleDict.get(modname) 702 if moduleObject is not None: 703 # we need 2 paths; one of the path entry and one for the module. 704 pe = PathEntry( 705 self._smartPath(self._findEntryPathString(moduleObject)), self 706 ) 707 mp = self._smartPath(moduleObject.__file__) 708 return PythonModule(modname, mp, pe) 709 710 # Recurse if we're trying to get a submodule. 711 if "." in modname: 712 pkg = self 713 for name in modname.split("."): 714 pkg = pkg[name] 715 return pkg 716 717 # Finally do the slowest possible thing and iterate 718 for module in self.iterModules(): 719 if module.name == modname: 720 return module 721 raise KeyError(modname) 722 723 def __contains__(self, module): 724 """ 725 Check to see whether or not a module exists on my import path. 726 727 @param module: The name of the module to look for on my import path. 728 @type module: C{str} 729 """ 730 try: 731 self.__getitem__(module) 732 return True 733 except KeyError: 734 return False 735 736 def __repr__(self) -> str: 737 """ 738 Display my sysPath and moduleDict in a string representation. 739 """ 740 return f"PythonPath({self.sysPath!r},{self.moduleDict!r})" 741 742 def iterModules(self): 743 """ 744 Yield all top-level modules on my sysPath. 745 """ 746 for entry in self.iterEntries(): 747 yield from entry.iterModules() 748 749 def walkModules(self, importPackages=False): 750 """ 751 Similar to L{iterModules}, this yields every module on the path, then every 752 submodule in each package or entry. 753 """ 754 for package in self.iterModules(): 755 yield from package.walkModules(importPackages=False) 756 757 758theSystemPath = PythonPath() 759 760 761def walkModules(importPackages=False): 762 """ 763 Deeply iterate all modules on the global python path. 764 765 @param importPackages: Import packages as they are seen. 766 """ 767 return theSystemPath.walkModules(importPackages=importPackages) 768 769 770def iterModules(): 771 """ 772 Iterate all modules and top-level packages on the global Python path, but 773 do not descend into packages. 774 """ 775 return theSystemPath.iterModules() 776 777 778def getModule(moduleName): 779 """ 780 Retrieve a module from the system path. 781 """ 782 return theSystemPath[moduleName] 783