1# -*- test-case-name: twisted.test.test_modules -*-
2# Copyright (c) Twisted Matrix Laboratories.
3# See LICENSE for details.
4
5"""
6This module aims to provide a unified, object-oriented view of Python's
7runtime hierarchy.
8
9Python is a very dynamic language with wide variety of introspection utilities.
10However, these utilities can be hard to use, because there is no consistent
11API.  The introspection API in python is made up of attributes (__name__,
12__module__, func_name, etc) on instances, modules, classes and functions which
13vary between those four types, utility modules such as 'inspect' which provide
14some functionality, the 'imp' module, the "compiler" module, the semantics of
15PEP 302 support, and setuptools, among other things.
16
17At the top, you have "PythonPath", an abstract representation of sys.path which
18includes methods to locate top-level modules, with or without loading them.
19The top-level exposed functions in this module for accessing the system path
20are "walkModules", "iterModules", and "getModule".
21
22From most to least specific, here are the objects provided::
23
24                  PythonPath  # sys.path
25                      |
26                      v
27                  PathEntry   # one entry on sys.path: an importer
28                      |
29                      v
30                 PythonModule # a module or package that can be loaded
31                      |
32                      v
33                 PythonAttribute # an attribute of a module (function or class)
34                      |
35                      v
36                 PythonAttribute # an attribute of a function or class
37                      |
38                      v
39                     ...
40
41Here's an example of idiomatic usage: this is what you would do to list all of
42the modules outside the standard library's python-files directory::
43
44    import os
45    stdlibdir = os.path.dirname(os.__file__)
46
47    from twisted.python.modules import iterModules
48
49    for modinfo in iterModules():
50        if (modinfo.pathEntry.filePath.path != stdlibdir
51            and not modinfo.isPackage()):
52            print('unpackaged: %s: %s' % (
53                modinfo.name, modinfo.filePath.path))
54
55@var theSystemPath: The very top of the Python object space.
56@type theSystemPath: L{PythonPath}
57"""
58
59
60import inspect
61import sys
62import warnings
63import zipimport
64
65# let's try to keep path imports to a minimum...
66from os.path import dirname, split as splitpath
67from typing import cast
68
69from zope.interface import Interface, implementer
70
71from twisted.python.compat import nativeString
72from twisted.python.components import registerAdapter
73from twisted.python.filepath import FilePath, UnlistableError
74from twisted.python.reflect import namedAny
75from twisted.python.zippath import ZipArchive
76
77_nothing = object()
78
79PYTHON_EXTENSIONS = [".py"]
80OPTIMIZED_MODE = __doc__ is None
81if OPTIMIZED_MODE:
82    PYTHON_EXTENSIONS.append(".pyo")
83else:
84    PYTHON_EXTENSIONS.append(".pyc")
85
86
87def _isPythonIdentifier(string):
88    """
89    cheezy fake test for proper identifier-ness.
90
91    @param string: a L{str} which might or might not be a valid python
92        identifier.
93    @return: True or False
94    """
95    textString = nativeString(string)
96    return " " not in textString and "." not in textString and "-" not in textString
97
98
99def _isPackagePath(fpath):
100    # Determine if a FilePath-like object is a Python package.  TODO: deal with
101    # __init__module.(so|dll|pyd)?
102    extless = fpath.splitext()[0]
103    basend = splitpath(extless)[1]
104    return basend == "__init__"
105
106
107class _ModuleIteratorHelper:
108    """
109    This mixin provides common behavior between python module and path entries,
110    since the mechanism for searching sys.path and __path__ attributes is
111    remarkably similar.
112    """
113
114    def iterModules(self):
115        """
116        Loop over the modules present below this entry or package on PYTHONPATH.
117
118        For modules which are not packages, this will yield nothing.
119
120        For packages and path entries, this will only yield modules one level
121        down; i.e. if there is a package a.b.c, iterModules on a will only
122        return a.b.  If you want to descend deeply, use walkModules.
123
124        @return: a generator which yields PythonModule instances that describe
125        modules which can be, or have been, imported.
126        """
127        yielded = {}
128        if not self.filePath.exists():
129            return
130
131        for placeToLook in self._packagePaths():
132            try:
133                children = sorted(placeToLook.children())
134            except UnlistableError:
135                continue
136
137            for potentialTopLevel in children:
138                ext = potentialTopLevel.splitext()[1]
139                potentialBasename = potentialTopLevel.basename()[: -len(ext)]
140                if ext in PYTHON_EXTENSIONS:
141                    # TODO: this should be a little choosier about which path entry
142                    # it selects first, and it should do all the .so checking and
143                    # crud
144                    if not _isPythonIdentifier(potentialBasename):
145                        continue
146                    modname = self._subModuleName(potentialBasename)
147                    if modname.split(".")[-1] == "__init__":
148                        # This marks the directory as a package so it can't be
149                        # a module.
150                        continue
151                    if modname not in yielded:
152                        yielded[modname] = True
153                        pm = PythonModule(modname, potentialTopLevel, self._getEntry())
154                        assert pm != self
155                        yield pm
156                else:
157                    if (
158                        ext
159                        or not _isPythonIdentifier(potentialBasename)
160                        or not potentialTopLevel.isdir()
161                    ):
162                        continue
163                    modname = self._subModuleName(potentialTopLevel.basename())
164                    for ext in PYTHON_EXTENSIONS:
165                        initpy = potentialTopLevel.child("__init__" + ext)
166                        if initpy.exists() and modname not in yielded:
167                            yielded[modname] = True
168                            pm = PythonModule(modname, initpy, self._getEntry())
169                            assert pm != self
170                            yield pm
171                            break
172
173    def walkModules(self, importPackages=False):
174        """
175        Similar to L{iterModules}, this yields self, and then every module in my
176        package or entry, and every submodule in each package or entry.
177
178        In other words, this is deep, and L{iterModules} is shallow.
179        """
180        yield self
181        for package in self.iterModules():
182            yield from package.walkModules(importPackages=importPackages)
183
184    def _subModuleName(self, mn):
185        """
186        This is a hook to provide packages with the ability to specify their names
187        as a prefix to submodules here.
188        """
189        return mn
190
191    def _packagePaths(self):
192        """
193        Implement in subclasses to specify where to look for modules.
194
195        @return: iterable of FilePath-like objects.
196        """
197        raise NotImplementedError()
198
199    def _getEntry(self):
200        """
201        Implement in subclasses to specify what path entry submodules will come
202        from.
203
204        @return: a PathEntry instance.
205        """
206        raise NotImplementedError()
207
208    def __getitem__(self, modname):
209        """
210        Retrieve a module from below this path or package.
211
212        @param modname: a str naming a module to be loaded.  For entries, this
213        is a top-level, undotted package name, and for packages it is the name
214        of the module without the package prefix.  For example, if you have a
215        PythonModule representing the 'twisted' package, you could use::
216
217            twistedPackageObj['python']['modules']
218
219        to retrieve this module.
220
221        @raise KeyError: if the module is not found.
222
223        @return: a PythonModule.
224        """
225        for module in self.iterModules():
226            if module.name == self._subModuleName(modname):
227                return module
228        raise KeyError(modname)
229
230    def __iter__(self):
231        """
232        Implemented to raise NotImplementedError for clarity, so that attempting to
233        loop over this object won't call __getitem__.
234
235        Note: in the future there might be some sensible default for iteration,
236        like 'walkEverything', so this is deliberately untested and undefined
237        behavior.
238        """
239        raise NotImplementedError()
240
241
242class PythonAttribute:
243    """
244    I represent a function, class, or other object that is present.
245
246    @ivar name: the fully-qualified python name of this attribute.
247
248    @ivar onObject: a reference to a PythonModule or other PythonAttribute that
249    is this attribute's logical parent.
250
251    @ivar name: the fully qualified python name of the attribute represented by
252    this class.
253    """
254
255    def __init__(self, name, onObject, loaded, pythonValue):
256        """
257        Create a PythonAttribute.  This is a private constructor.  Do not construct
258        me directly, use PythonModule.iterAttributes.
259
260        @param name: the FQPN
261        @param onObject: see ivar
262        @param loaded: always True, for now
263        @param pythonValue: the value of the attribute we're pointing to.
264        """
265        self.name = name
266        self.onObject = onObject
267        self._loaded = loaded
268        self.pythonValue = pythonValue
269
270    def __repr__(self) -> str:
271        return f"PythonAttribute<{self.name!r}>"
272
273    def isLoaded(self):
274        """
275        Return a boolean describing whether the attribute this describes has
276        actually been loaded into memory by importing its module.
277
278        Note: this currently always returns true; there is no Python parser
279        support in this module yet.
280        """
281        return self._loaded
282
283    def load(self, default=_nothing):
284        """
285        Load the value associated with this attribute.
286
287        @return: an arbitrary Python object, or 'default' if there is an error
288        loading it.
289        """
290        return self.pythonValue
291
292    def iterAttributes(self):
293        for name, val in inspect.getmembers(self.load()):
294            yield PythonAttribute(self.name + "." + name, self, True, val)
295
296
297class PythonModule(_ModuleIteratorHelper):
298    """
299    Representation of a module which could be imported from sys.path.
300
301    @ivar name: the fully qualified python name of this module.
302
303    @ivar filePath: a FilePath-like object which points to the location of this
304    module.
305
306    @ivar pathEntry: a L{PathEntry} instance which this module was located
307    from.
308    """
309
310    def __init__(self, name, filePath, pathEntry):
311        """
312        Create a PythonModule.  Do not construct this directly, instead inspect a
313        PythonPath or other PythonModule instances.
314
315        @param name: see ivar
316        @param filePath: see ivar
317        @param pathEntry: see ivar
318        """
319        _name = nativeString(name)
320        assert not _name.endswith(".__init__")
321        self.name = _name
322        self.filePath = filePath
323        self.parentPath = filePath.parent()
324        self.pathEntry = pathEntry
325
326    def _getEntry(self):
327        return self.pathEntry
328
329    def __repr__(self) -> str:
330        """
331        Return a string representation including the module name.
332        """
333        return f"PythonModule<{self.name!r}>"
334
335    def isLoaded(self):
336        """
337        Determine if the module is loaded into sys.modules.
338
339        @return: a boolean: true if loaded, false if not.
340        """
341        return self.pathEntry.pythonPath.moduleDict.get(self.name) is not None
342
343    def iterAttributes(self):
344        """
345        List all the attributes defined in this module.
346
347        Note: Future work is planned here to make it possible to list python
348        attributes on a module without loading the module by inspecting ASTs or
349        bytecode, but currently any iteration of PythonModule objects insists
350        they must be loaded, and will use inspect.getmodule.
351
352        @raise NotImplementedError: if this module is not loaded.
353
354        @return: a generator yielding PythonAttribute instances describing the
355        attributes of this module.
356        """
357        if not self.isLoaded():
358            raise NotImplementedError(
359                "You can't load attributes from non-loaded modules yet."
360            )
361        for name, val in inspect.getmembers(self.load()):
362            yield PythonAttribute(self.name + "." + name, self, True, val)
363
364    def isPackage(self):
365        """
366        Returns true if this module is also a package, and might yield something
367        from iterModules.
368        """
369        return _isPackagePath(self.filePath)
370
371    def load(self, default=_nothing):
372        """
373        Load this module.
374
375        @param default: if specified, the value to return in case of an error.
376
377        @return: a genuine python module.
378
379        @raise Exception: Importing modules is a risky business;
380        the erorrs of any code run at module scope may be raised from here, as
381        well as ImportError if something bizarre happened to the system path
382        between the discovery of this PythonModule object and the attempt to
383        import it.  If you specify a default, the error will be swallowed
384        entirely, and not logged.
385
386        @rtype: types.ModuleType.
387        """
388        try:
389            return self.pathEntry.pythonPath.moduleLoader(self.name)
390        except BaseException:  # this needs more thought...
391            if default is not _nothing:
392                return default
393            raise
394
395    def __eq__(self, other: object) -> bool:
396        """
397        PythonModules with the same name are equal.
398        """
399        if isinstance(other, PythonModule):
400            return cast(bool, other.name == self.name)
401        return NotImplemented
402
403    def walkModules(self, importPackages=False):
404        if importPackages and self.isPackage():
405            self.load()
406        return super().walkModules(importPackages=importPackages)
407
408    def _subModuleName(self, mn):
409        """
410        submodules of this module are prefixed with our name.
411        """
412        return self.name + "." + mn
413
414    def _packagePaths(self):
415        """
416        Yield a sequence of FilePath-like objects which represent path segments.
417        """
418        if not self.isPackage():
419            return
420        if self.isLoaded():
421            load = self.load()
422            if hasattr(load, "__path__"):
423                for fn in load.__path__:
424                    if fn == self.parentPath.path:
425                        # this should _really_ exist.
426                        assert self.parentPath.exists()
427                        yield self.parentPath
428                    else:
429                        smp = self.pathEntry.pythonPath._smartPath(fn)
430                        if smp.exists():
431                            yield smp
432        else:
433            yield self.parentPath
434
435
436class PathEntry(_ModuleIteratorHelper):
437    """
438    I am a proxy for a single entry on sys.path.
439
440    @ivar filePath: a FilePath-like object pointing at the filesystem location
441    or archive file where this path entry is stored.
442
443    @ivar pythonPath: a PythonPath instance.
444    """
445
446    def __init__(self, filePath, pythonPath):
447        """
448        Create a PathEntry.  This is a private constructor.
449        """
450        self.filePath = filePath
451        self.pythonPath = pythonPath
452
453    def _getEntry(self):
454        return self
455
456    def __repr__(self) -> str:
457        return f"PathEntry<{self.filePath!r}>"
458
459    def _packagePaths(self):
460        yield self.filePath
461
462
463class IPathImportMapper(Interface):
464    """
465    This is an internal interface, used to map importers to factories for
466    FilePath-like objects.
467    """
468
469    def mapPath(pathLikeString):
470        """
471        Return a FilePath-like object.
472
473        @param pathLikeString: a path-like string, like one that might be
474        passed to an import hook.
475
476        @return: a L{FilePath}, or something like it (currently only a
477        L{ZipPath}, but more might be added later).
478        """
479
480
481@implementer(IPathImportMapper)
482class _DefaultMapImpl:
483    """Wrapper for the default importer, i.e. None."""
484
485    def mapPath(self, fsPathString):
486        return FilePath(fsPathString)
487
488
489_theDefaultMapper = _DefaultMapImpl()
490
491
492@implementer(IPathImportMapper)
493class _ZipMapImpl:
494    """IPathImportMapper implementation for zipimport.ZipImporter."""
495
496    def __init__(self, importer):
497        self.importer = importer
498
499    def mapPath(self, fsPathString):
500        """
501        Map the given FS path to a ZipPath, by looking at the ZipImporter's
502        "archive" attribute and using it as our ZipArchive root, then walking
503        down into the archive from there.
504
505        @return: a L{zippath.ZipPath} or L{zippath.ZipArchive} instance.
506        """
507        za = ZipArchive(self.importer.archive)
508        myPath = FilePath(self.importer.archive)
509        itsPath = FilePath(fsPathString)
510        if myPath == itsPath:
511            return za
512        # This is NOT a general-purpose rule for sys.path or __file__:
513        # zipimport specifically uses regular OS path syntax in its
514        # pathnames, even though zip files specify that slashes are always
515        # the separator, regardless of platform.
516        segs = itsPath.segmentsFrom(myPath)
517        zp = za
518        for seg in segs:
519            zp = zp.child(seg)
520        return zp
521
522
523registerAdapter(_ZipMapImpl, zipimport.zipimporter, IPathImportMapper)
524
525
526def _defaultSysPathFactory():
527    """
528    Provide the default behavior of PythonPath's sys.path factory, which is to
529    return the current value of sys.path.
530
531    @return: L{sys.path}
532    """
533    return sys.path
534
535
536class PythonPath:
537    """
538    I represent the very top of the Python object-space, the module list in
539    C{sys.path} and the modules list in C{sys.modules}.
540
541    @ivar _sysPath: A sequence of strings like C{sys.path}.  This attribute is
542    read-only.
543
544    @ivar sysPath: The current value of the module search path list.
545    @type sysPath: C{list}
546
547    @ivar moduleDict: A dictionary mapping string module names to module
548    objects, like C{sys.modules}.
549
550    @ivar sysPathHooks: A list of PEP-302 path hooks, like C{sys.path_hooks}.
551
552    @ivar moduleLoader: A function that takes a fully-qualified python name and
553    returns a module, like L{twisted.python.reflect.namedAny}.
554    """
555
556    def __init__(
557        self,
558        sysPath=None,
559        moduleDict=sys.modules,
560        sysPathHooks=sys.path_hooks,
561        importerCache=sys.path_importer_cache,
562        moduleLoader=namedAny,
563        sysPathFactory=None,
564    ):
565        """
566        Create a PythonPath.  You almost certainly want to use
567        modules.theSystemPath, or its aliased methods, rather than creating a
568        new instance yourself, though.
569
570        All parameters are optional, and if unspecified, will use 'system'
571        equivalents that makes this PythonPath like the global L{theSystemPath}
572        instance.
573
574        @param sysPath: a sys.path-like list to use for this PythonPath, to
575        specify where to load modules from.
576
577        @param moduleDict: a sys.modules-like dictionary to use for keeping
578        track of what modules this PythonPath has loaded.
579
580        @param sysPathHooks: sys.path_hooks-like list of PEP-302 path hooks to
581        be used for this PythonPath, to determie which importers should be
582        used.
583
584        @param importerCache: a sys.path_importer_cache-like list of PEP-302
585        importers.  This will be used in conjunction with the given
586        sysPathHooks.
587
588        @param moduleLoader: a module loader function which takes a string and
589        returns a module.  That is to say, it is like L{namedAny} - *not* like
590        L{__import__}.
591
592        @param sysPathFactory: a 0-argument callable which returns the current
593        value of a sys.path-like list of strings.  Specify either this, or
594        sysPath, not both.  This alternative interface is provided because the
595        way the Python import mechanism works, you can re-bind the 'sys.path'
596        name and that is what is used for current imports, so it must be a
597        factory rather than a value to deal with modification by rebinding
598        rather than modification by mutation.  Note: it is not recommended to
599        rebind sys.path.  Although this mechanism can deal with that, it is a
600        subtle point which some tools that it is easy for tools which interact
601        with sys.path to miss.
602        """
603        if sysPath is not None:
604            sysPathFactory = lambda: sysPath
605        elif sysPathFactory is None:
606            sysPathFactory = _defaultSysPathFactory
607        self._sysPathFactory = sysPathFactory
608        self._sysPath = sysPath
609        self.moduleDict = moduleDict
610        self.sysPathHooks = sysPathHooks
611        self.importerCache = importerCache
612        self.moduleLoader = moduleLoader
613
614    @property
615    def sysPath(self):
616        """
617        Retrieve the current value of the module search path list.
618        """
619        return self._sysPathFactory()
620
621    def _findEntryPathString(self, modobj):
622        """
623        Determine where a given Python module object came from by looking at path
624        entries.
625        """
626        topPackageObj = modobj
627        while "." in topPackageObj.__name__:
628            topPackageObj = self.moduleDict[
629                ".".join(topPackageObj.__name__.split(".")[:-1])
630            ]
631        if _isPackagePath(FilePath(topPackageObj.__file__)):
632            # if package 'foo' is on sys.path at /a/b/foo, package 'foo's
633            # __file__ will be /a/b/foo/__init__.py, and we are looking for
634            # /a/b here, the path-entry; so go up two steps.
635            rval = dirname(dirname(topPackageObj.__file__))
636        else:
637            # the module is completely top-level, not within any packages.  The
638            # path entry it's on is just its dirname.
639            rval = dirname(topPackageObj.__file__)
640
641        # There are probably some awful tricks that an importer could pull
642        # which would break this, so let's just make sure... it's a loaded
643        # module after all, which means that its path MUST be in
644        # path_importer_cache according to PEP 302 -glyph
645        if rval not in self.importerCache:
646            warnings.warn(
647                "%s (for module %s) not in path importer cache "
648                "(PEP 302 violation - check your local configuration)."
649                % (rval, modobj.__name__),
650                stacklevel=3,
651            )
652
653        return rval
654
655    def _smartPath(self, pathName):
656        """
657        Given a path entry from sys.path which may refer to an importer,
658        return the appropriate FilePath-like instance.
659
660        @param pathName: a str describing the path.
661
662        @return: a FilePath-like object.
663        """
664        importr = self.importerCache.get(pathName, _nothing)
665        if importr is _nothing:
666            for hook in self.sysPathHooks:
667                try:
668                    importr = hook(pathName)
669                except ImportError:
670                    pass
671            if importr is _nothing:  # still
672                importr = None
673        return IPathImportMapper(importr, _theDefaultMapper).mapPath(pathName)
674
675    def iterEntries(self):
676        """
677        Iterate the entries on my sysPath.
678
679        @return: a generator yielding PathEntry objects
680        """
681        for pathName in self.sysPath:
682            fp = self._smartPath(pathName)
683            yield PathEntry(fp, self)
684
685    def __getitem__(self, modname):
686        """
687        Get a python module by its given fully-qualified name.
688
689        @param modname: The fully-qualified Python module name to load.
690
691        @type modname: C{str}
692
693        @return: an object representing the module identified by C{modname}
694
695        @rtype: L{PythonModule}
696
697        @raise KeyError: if the module name is not a valid module name, or no
698            such module can be identified as loadable.
699        """
700        # See if the module is already somewhere in Python-land.
701        moduleObject = self.moduleDict.get(modname)
702        if moduleObject is not None:
703            # we need 2 paths; one of the path entry and one for the module.
704            pe = PathEntry(
705                self._smartPath(self._findEntryPathString(moduleObject)), self
706            )
707            mp = self._smartPath(moduleObject.__file__)
708            return PythonModule(modname, mp, pe)
709
710        # Recurse if we're trying to get a submodule.
711        if "." in modname:
712            pkg = self
713            for name in modname.split("."):
714                pkg = pkg[name]
715            return pkg
716
717        # Finally do the slowest possible thing and iterate
718        for module in self.iterModules():
719            if module.name == modname:
720                return module
721        raise KeyError(modname)
722
723    def __contains__(self, module):
724        """
725        Check to see whether or not a module exists on my import path.
726
727        @param module: The name of the module to look for on my import path.
728        @type module: C{str}
729        """
730        try:
731            self.__getitem__(module)
732            return True
733        except KeyError:
734            return False
735
736    def __repr__(self) -> str:
737        """
738        Display my sysPath and moduleDict in a string representation.
739        """
740        return f"PythonPath({self.sysPath!r},{self.moduleDict!r})"
741
742    def iterModules(self):
743        """
744        Yield all top-level modules on my sysPath.
745        """
746        for entry in self.iterEntries():
747            yield from entry.iterModules()
748
749    def walkModules(self, importPackages=False):
750        """
751        Similar to L{iterModules}, this yields every module on the path, then every
752        submodule in each package or entry.
753        """
754        for package in self.iterModules():
755            yield from package.walkModules(importPackages=False)
756
757
758theSystemPath = PythonPath()
759
760
761def walkModules(importPackages=False):
762    """
763    Deeply iterate all modules on the global python path.
764
765    @param importPackages: Import packages as they are seen.
766    """
767    return theSystemPath.walkModules(importPackages=importPackages)
768
769
770def iterModules():
771    """
772    Iterate all modules and top-level packages on the global Python path, but
773    do not descend into packages.
774    """
775    return theSystemPath.iterModules()
776
777
778def getModule(moduleName):
779    """
780    Retrieve a module from the system path.
781    """
782    return theSystemPath[moduleName]
783