1"""Utilities to support packages."""
2
3from collections import namedtuple
4from functools import singledispatch as simplegeneric
5import importlib
6import importlib.util
7import importlib.machinery
8import os
9import os.path
10import sys
11from types import ModuleType
12import warnings
13
14__all__ = [
15    'get_importer', 'iter_importers', 'get_loader', 'find_loader',
16    'walk_packages', 'iter_modules', 'get_data',
17    'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
18    'ModuleInfo',
19]
20
21
22ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg')
23ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.'
24
25
26def _get_spec(finder, name):
27    """Return the finder-specific module spec."""
28    # Works with legacy finders.
29    try:
30        find_spec = finder.find_spec
31    except AttributeError:
32        loader = finder.find_module(name)
33        if loader is None:
34            return None
35        return importlib.util.spec_from_loader(name, loader)
36    else:
37        return find_spec(name)
38
39
40def read_code(stream):
41    # This helper is needed in order for the PEP 302 emulation to
42    # correctly handle compiled files
43    import marshal
44
45    magic = stream.read(4)
46    if magic != importlib.util.MAGIC_NUMBER:
47        return None
48
49    stream.read(12) # Skip rest of the header
50    return marshal.load(stream)
51
52
53def walk_packages(path=None, prefix='', onerror=None):
54    """Yields ModuleInfo for all modules recursively
55    on path, or, if path is None, all accessible modules.
56
57    'path' should be either None or a list of paths to look for
58    modules in.
59
60    'prefix' is a string to output on the front of every module name
61    on output.
62
63    Note that this function must import all *packages* (NOT all
64    modules!) on the given path, in order to access the __path__
65    attribute to find submodules.
66
67    'onerror' is a function which gets called with one argument (the
68    name of the package which was being imported) if any exception
69    occurs while trying to import a package.  If no onerror function is
70    supplied, ImportErrors are caught and ignored, while all other
71    exceptions are propagated, terminating the search.
72
73    Examples:
74
75    # list all modules python can access
76    walk_packages()
77
78    # list all submodules of ctypes
79    walk_packages(ctypes.__path__, ctypes.__name__+'.')
80    """
81
82    def seen(p, m={}):
83        if p in m:
84            return True
85        m[p] = True
86
87    for info in iter_modules(path, prefix):
88        yield info
89
90        if info.ispkg:
91            try:
92                __import__(info.name)
93            except ImportError:
94                if onerror is not None:
95                    onerror(info.name)
96            except Exception:
97                if onerror is not None:
98                    onerror(info.name)
99                else:
100                    raise
101            else:
102                path = getattr(sys.modules[info.name], '__path__', None) or []
103
104                # don't traverse path items we've seen before
105                path = [p for p in path if not seen(p)]
106
107                yield from walk_packages(path, info.name+'.', onerror)
108
109
110def iter_modules(path=None, prefix=''):
111    """Yields ModuleInfo for all submodules on path,
112    or, if path is None, all top-level modules on sys.path.
113
114    'path' should be either None or a list of paths to look for
115    modules in.
116
117    'prefix' is a string to output on the front of every module name
118    on output.
119    """
120    if path is None:
121        importers = iter_importers()
122    elif isinstance(path, str):
123        raise ValueError("path must be None or list of paths to look for "
124                        "modules in")
125    else:
126        importers = map(get_importer, path)
127
128    yielded = {}
129    for i in importers:
130        for name, ispkg in iter_importer_modules(i, prefix):
131            if name not in yielded:
132                yielded[name] = 1
133                yield ModuleInfo(i, name, ispkg)
134
135
136@simplegeneric
137def iter_importer_modules(importer, prefix=''):
138    if not hasattr(importer, 'iter_modules'):
139        return []
140    return importer.iter_modules(prefix)
141
142
143# Implement a file walker for the normal importlib path hook
144def _iter_file_finder_modules(importer, prefix=''):
145    if importer.path is None or not os.path.isdir(importer.path):
146        return
147
148    yielded = {}
149    import inspect
150    try:
151        filenames = os.listdir(importer.path)
152    except OSError:
153        # ignore unreadable directories like import does
154        filenames = []
155    filenames.sort()  # handle packages before same-named modules
156
157    for fn in filenames:
158        modname = inspect.getmodulename(fn)
159        if modname=='__init__' or modname in yielded:
160            continue
161
162        path = os.path.join(importer.path, fn)
163        ispkg = False
164
165        if not modname and os.path.isdir(path) and '.' not in fn:
166            modname = fn
167            try:
168                dircontents = os.listdir(path)
169            except OSError:
170                # ignore unreadable directories like import does
171                dircontents = []
172            for fn in dircontents:
173                subname = inspect.getmodulename(fn)
174                if subname=='__init__':
175                    ispkg = True
176                    break
177            else:
178                continue    # not a package
179
180        if modname and '.' not in modname:
181            yielded[modname] = 1
182            yield prefix + modname, ispkg
183
184iter_importer_modules.register(
185    importlib.machinery.FileFinder, _iter_file_finder_modules)
186
187
188def _import_imp():
189    global imp
190    with warnings.catch_warnings():
191        warnings.simplefilter('ignore', DeprecationWarning)
192        imp = importlib.import_module('imp')
193
194class ImpImporter:
195    """PEP 302 Finder that wraps Python's "classic" import algorithm
196
197    ImpImporter(dirname) produces a PEP 302 finder that searches that
198    directory.  ImpImporter(None) produces a PEP 302 finder that searches
199    the current sys.path, plus any modules that are frozen or built-in.
200
201    Note that ImpImporter does not currently support being used by placement
202    on sys.meta_path.
203    """
204
205    def __init__(self, path=None):
206        global imp
207        warnings.warn("This emulation is deprecated and slated for removal "
208                      "in Python 3.12; use 'importlib' instead",
209             DeprecationWarning)
210        _import_imp()
211        self.path = path
212
213    def find_module(self, fullname, path=None):
214        # Note: we ignore 'path' argument since it is only used via meta_path
215        subname = fullname.split(".")[-1]
216        if subname != fullname and self.path is None:
217            return None
218        if self.path is None:
219            path = None
220        else:
221            path = [os.path.realpath(self.path)]
222        try:
223            file, filename, etc = imp.find_module(subname, path)
224        except ImportError:
225            return None
226        return ImpLoader(fullname, file, filename, etc)
227
228    def iter_modules(self, prefix=''):
229        if self.path is None or not os.path.isdir(self.path):
230            return
231
232        yielded = {}
233        import inspect
234        try:
235            filenames = os.listdir(self.path)
236        except OSError:
237            # ignore unreadable directories like import does
238            filenames = []
239        filenames.sort()  # handle packages before same-named modules
240
241        for fn in filenames:
242            modname = inspect.getmodulename(fn)
243            if modname=='__init__' or modname in yielded:
244                continue
245
246            path = os.path.join(self.path, fn)
247            ispkg = False
248
249            if not modname and os.path.isdir(path) and '.' not in fn:
250                modname = fn
251                try:
252                    dircontents = os.listdir(path)
253                except OSError:
254                    # ignore unreadable directories like import does
255                    dircontents = []
256                for fn in dircontents:
257                    subname = inspect.getmodulename(fn)
258                    if subname=='__init__':
259                        ispkg = True
260                        break
261                else:
262                    continue    # not a package
263
264            if modname and '.' not in modname:
265                yielded[modname] = 1
266                yield prefix + modname, ispkg
267
268
269class ImpLoader:
270    """PEP 302 Loader that wraps Python's "classic" import algorithm
271    """
272    code = source = None
273
274    def __init__(self, fullname, file, filename, etc):
275        warnings.warn("This emulation is deprecated and slated for removal in "
276                      "Python 3.12; use 'importlib' instead",
277                      DeprecationWarning)
278        _import_imp()
279        self.file = file
280        self.filename = filename
281        self.fullname = fullname
282        self.etc = etc
283
284    def load_module(self, fullname):
285        self._reopen()
286        try:
287            mod = imp.load_module(fullname, self.file, self.filename, self.etc)
288        finally:
289            if self.file:
290                self.file.close()
291        # Note: we don't set __loader__ because we want the module to look
292        # normal; i.e. this is just a wrapper for standard import machinery
293        return mod
294
295    def get_data(self, pathname):
296        with open(pathname, "rb") as file:
297            return file.read()
298
299    def _reopen(self):
300        if self.file and self.file.closed:
301            mod_type = self.etc[2]
302            if mod_type==imp.PY_SOURCE:
303                self.file = open(self.filename, 'r')
304            elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
305                self.file = open(self.filename, 'rb')
306
307    def _fix_name(self, fullname):
308        if fullname is None:
309            fullname = self.fullname
310        elif fullname != self.fullname:
311            raise ImportError("Loader for module %s cannot handle "
312                              "module %s" % (self.fullname, fullname))
313        return fullname
314
315    def is_package(self, fullname):
316        fullname = self._fix_name(fullname)
317        return self.etc[2]==imp.PKG_DIRECTORY
318
319    def get_code(self, fullname=None):
320        fullname = self._fix_name(fullname)
321        if self.code is None:
322            mod_type = self.etc[2]
323            if mod_type==imp.PY_SOURCE:
324                source = self.get_source(fullname)
325                self.code = compile(source, self.filename, 'exec')
326            elif mod_type==imp.PY_COMPILED:
327                self._reopen()
328                try:
329                    self.code = read_code(self.file)
330                finally:
331                    self.file.close()
332            elif mod_type==imp.PKG_DIRECTORY:
333                self.code = self._get_delegate().get_code()
334        return self.code
335
336    def get_source(self, fullname=None):
337        fullname = self._fix_name(fullname)
338        if self.source is None:
339            mod_type = self.etc[2]
340            if mod_type==imp.PY_SOURCE:
341                self._reopen()
342                try:
343                    self.source = self.file.read()
344                finally:
345                    self.file.close()
346            elif mod_type==imp.PY_COMPILED:
347                if os.path.exists(self.filename[:-1]):
348                    with open(self.filename[:-1], 'r') as f:
349                        self.source = f.read()
350            elif mod_type==imp.PKG_DIRECTORY:
351                self.source = self._get_delegate().get_source()
352        return self.source
353
354    def _get_delegate(self):
355        finder = ImpImporter(self.filename)
356        spec = _get_spec(finder, '__init__')
357        return spec.loader
358
359    def get_filename(self, fullname=None):
360        fullname = self._fix_name(fullname)
361        mod_type = self.etc[2]
362        if mod_type==imp.PKG_DIRECTORY:
363            return self._get_delegate().get_filename()
364        elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
365            return self.filename
366        return None
367
368
369try:
370    import zipimport
371    from zipimport import zipimporter
372
373    def iter_zipimport_modules(importer, prefix=''):
374        dirlist = sorted(zipimport._zip_directory_cache[importer.archive])
375        _prefix = importer.prefix
376        plen = len(_prefix)
377        yielded = {}
378        import inspect
379        for fn in dirlist:
380            if not fn.startswith(_prefix):
381                continue
382
383            fn = fn[plen:].split(os.sep)
384
385            if len(fn)==2 and fn[1].startswith('__init__.py'):
386                if fn[0] not in yielded:
387                    yielded[fn[0]] = 1
388                    yield prefix + fn[0], True
389
390            if len(fn)!=1:
391                continue
392
393            modname = inspect.getmodulename(fn[0])
394            if modname=='__init__':
395                continue
396
397            if modname and '.' not in modname and modname not in yielded:
398                yielded[modname] = 1
399                yield prefix + modname, False
400
401    iter_importer_modules.register(zipimporter, iter_zipimport_modules)
402
403except ImportError:
404    pass
405
406
407def get_importer(path_item):
408    """Retrieve a finder for the given path item
409
410    The returned finder is cached in sys.path_importer_cache
411    if it was newly created by a path hook.
412
413    The cache (or part of it) can be cleared manually if a
414    rescan of sys.path_hooks is necessary.
415    """
416    path_item = os.fsdecode(path_item)
417    try:
418        importer = sys.path_importer_cache[path_item]
419    except KeyError:
420        for path_hook in sys.path_hooks:
421            try:
422                importer = path_hook(path_item)
423                sys.path_importer_cache.setdefault(path_item, importer)
424                break
425            except ImportError:
426                pass
427        else:
428            importer = None
429    return importer
430
431
432def iter_importers(fullname=""):
433    """Yield finders for the given module name
434
435    If fullname contains a '.', the finders will be for the package
436    containing fullname, otherwise they will be all registered top level
437    finders (i.e. those on both sys.meta_path and sys.path_hooks).
438
439    If the named module is in a package, that package is imported as a side
440    effect of invoking this function.
441
442    If no module name is specified, all top level finders are produced.
443    """
444    if fullname.startswith('.'):
445        msg = "Relative module name {!r} not supported".format(fullname)
446        raise ImportError(msg)
447    if '.' in fullname:
448        # Get the containing package's __path__
449        pkg_name = fullname.rpartition(".")[0]
450        pkg = importlib.import_module(pkg_name)
451        path = getattr(pkg, '__path__', None)
452        if path is None:
453            return
454    else:
455        yield from sys.meta_path
456        path = sys.path
457    for item in path:
458        yield get_importer(item)
459
460
461def get_loader(module_or_name):
462    """Get a "loader" object for module_or_name
463
464    Returns None if the module cannot be found or imported.
465    If the named module is not already imported, its containing package
466    (if any) is imported, in order to establish the package __path__.
467    """
468    if module_or_name in sys.modules:
469        module_or_name = sys.modules[module_or_name]
470        if module_or_name is None:
471            return None
472    if isinstance(module_or_name, ModuleType):
473        module = module_or_name
474        loader = getattr(module, '__loader__', None)
475        if loader is not None:
476            return loader
477        if getattr(module, '__spec__', None) is None:
478            return None
479        fullname = module.__name__
480    else:
481        fullname = module_or_name
482    return find_loader(fullname)
483
484
485def find_loader(fullname):
486    """Find a "loader" object for fullname
487
488    This is a backwards compatibility wrapper around
489    importlib.util.find_spec that converts most failures to ImportError
490    and only returns the loader rather than the full spec
491    """
492    if fullname.startswith('.'):
493        msg = "Relative module name {!r} not supported".format(fullname)
494        raise ImportError(msg)
495    try:
496        spec = importlib.util.find_spec(fullname)
497    except (ImportError, AttributeError, TypeError, ValueError) as ex:
498        # This hack fixes an impedance mismatch between pkgutil and
499        # importlib, where the latter raises other errors for cases where
500        # pkgutil previously raised ImportError
501        msg = "Error while finding loader for {!r} ({}: {})"
502        raise ImportError(msg.format(fullname, type(ex), ex)) from ex
503    return spec.loader if spec is not None else None
504
505
506def extend_path(path, name):
507    """Extend a package's path.
508
509    Intended use is to place the following code in a package's __init__.py:
510
511        from pkgutil import extend_path
512        __path__ = extend_path(__path__, __name__)
513
514    This will add to the package's __path__ all subdirectories of
515    directories on sys.path named after the package.  This is useful
516    if one wants to distribute different parts of a single logical
517    package as multiple directories.
518
519    It also looks for *.pkg files beginning where * matches the name
520    argument.  This feature is similar to *.pth files (see site.py),
521    except that it doesn't special-case lines starting with 'import'.
522    A *.pkg file is trusted at face value: apart from checking for
523    duplicates, all entries found in a *.pkg file are added to the
524    path, regardless of whether they are exist the filesystem.  (This
525    is a feature.)
526
527    If the input path is not a list (as is the case for frozen
528    packages) it is returned unchanged.  The input path is not
529    modified; an extended copy is returned.  Items are only appended
530    to the copy at the end.
531
532    It is assumed that sys.path is a sequence.  Items of sys.path that
533    are not (unicode or 8-bit) strings referring to existing
534    directories are ignored.  Unicode items of sys.path that cause
535    errors when used as filenames may cause this function to raise an
536    exception (in line with os.path.isdir() behavior).
537    """
538
539    if not isinstance(path, list):
540        # This could happen e.g. when this is called from inside a
541        # frozen package.  Return the path unchanged in that case.
542        return path
543
544    sname_pkg = name + ".pkg"
545
546    path = path[:] # Start with a copy of the existing path
547
548    parent_package, _, final_name = name.rpartition('.')
549    if parent_package:
550        try:
551            search_path = sys.modules[parent_package].__path__
552        except (KeyError, AttributeError):
553            # We can't do anything: find_loader() returns None when
554            # passed a dotted name.
555            return path
556    else:
557        search_path = sys.path
558
559    for dir in search_path:
560        if not isinstance(dir, str):
561            continue
562
563        finder = get_importer(dir)
564        if finder is not None:
565            portions = []
566            if hasattr(finder, 'find_spec'):
567                spec = finder.find_spec(final_name)
568                if spec is not None:
569                    portions = spec.submodule_search_locations or []
570            # Is this finder PEP 420 compliant?
571            elif hasattr(finder, 'find_loader'):
572                _, portions = finder.find_loader(final_name)
573
574            for portion in portions:
575                # XXX This may still add duplicate entries to path on
576                # case-insensitive filesystems
577                if portion not in path:
578                    path.append(portion)
579
580        # XXX Is this the right thing for subpackages like zope.app?
581        # It looks for a file named "zope.app.pkg"
582        pkgfile = os.path.join(dir, sname_pkg)
583        if os.path.isfile(pkgfile):
584            try:
585                f = open(pkgfile)
586            except OSError as msg:
587                sys.stderr.write("Can't open %s: %s\n" %
588                                 (pkgfile, msg))
589            else:
590                with f:
591                    for line in f:
592                        line = line.rstrip('\n')
593                        if not line or line.startswith('#'):
594                            continue
595                        path.append(line) # Don't check for existence!
596
597    return path
598
599
600def get_data(package, resource):
601    """Get a resource from a package.
602
603    This is a wrapper round the PEP 302 loader get_data API. The package
604    argument should be the name of a package, in standard module format
605    (foo.bar). The resource argument should be in the form of a relative
606    filename, using '/' as the path separator. The parent directory name '..'
607    is not allowed, and nor is a rooted name (starting with a '/').
608
609    The function returns a binary string, which is the contents of the
610    specified resource.
611
612    For packages located in the filesystem, which have already been imported,
613    this is the rough equivalent of
614
615        d = os.path.dirname(sys.modules[package].__file__)
616        data = open(os.path.join(d, resource), 'rb').read()
617
618    If the package cannot be located or loaded, or it uses a PEP 302 loader
619    which does not support get_data(), then None is returned.
620    """
621
622    spec = importlib.util.find_spec(package)
623    if spec is None:
624        return None
625    loader = spec.loader
626    if loader is None or not hasattr(loader, 'get_data'):
627        return None
628    # XXX needs test
629    mod = (sys.modules.get(package) or
630           importlib._bootstrap._load(spec))
631    if mod is None or not hasattr(mod, '__file__'):
632        return None
633
634    # Modify the resource name to be compatible with the loader.get_data
635    # signature - an os.path format "filename" starting with the dirname of
636    # the package's __file__
637    parts = resource.split('/')
638    parts.insert(0, os.path.dirname(mod.__file__))
639    resource_name = os.path.join(*parts)
640    return loader.get_data(resource_name)
641
642
643_NAME_PATTERN = None
644
645def resolve_name(name):
646    """
647    Resolve a name to an object.
648
649    It is expected that `name` will be a string in one of the following
650    formats, where W is shorthand for a valid Python identifier and dot stands
651    for a literal period in these pseudo-regexes:
652
653    W(.W)*
654    W(.W)*:(W(.W)*)?
655
656    The first form is intended for backward compatibility only. It assumes that
657    some part of the dotted name is a package, and the rest is an object
658    somewhere within that package, possibly nested inside other objects.
659    Because the place where the package stops and the object hierarchy starts
660    can't be inferred by inspection, repeated attempts to import must be done
661    with this form.
662
663    In the second form, the caller makes the division point clear through the
664    provision of a single colon: the dotted name to the left of the colon is a
665    package to be imported, and the dotted name to the right is the object
666    hierarchy within that package. Only one import is needed in this form. If
667    it ends with the colon, then a module object is returned.
668
669    The function will return an object (which might be a module), or raise one
670    of the following exceptions:
671
672    ValueError - if `name` isn't in a recognised format
673    ImportError - if an import failed when it shouldn't have
674    AttributeError - if a failure occurred when traversing the object hierarchy
675                     within the imported package to get to the desired object.
676    """
677    global _NAME_PATTERN
678    if _NAME_PATTERN is None:
679        # Lazy import to speedup Python startup time
680        import re
681        dotted_words = r'(?!\d)(\w+)(\.(?!\d)(\w+))*'
682        _NAME_PATTERN = re.compile(f'^(?P<pkg>{dotted_words})'
683                                   f'(?P<cln>:(?P<obj>{dotted_words})?)?$',
684                                   re.UNICODE)
685
686    m = _NAME_PATTERN.match(name)
687    if not m:
688        raise ValueError(f'invalid format: {name!r}')
689    gd = m.groupdict()
690    if gd.get('cln'):
691        # there is a colon - a one-step import is all that's needed
692        mod = importlib.import_module(gd['pkg'])
693        parts = gd.get('obj')
694        parts = parts.split('.') if parts else []
695    else:
696        # no colon - have to iterate to find the package boundary
697        parts = name.split('.')
698        modname = parts.pop(0)
699        # first part *must* be a module/package.
700        mod = importlib.import_module(modname)
701        while parts:
702            p = parts[0]
703            s = f'{modname}.{p}'
704            try:
705                mod = importlib.import_module(s)
706                parts.pop(0)
707                modname = s
708            except ImportError:
709                break
710    # if we reach this point, mod is the module, already imported, and
711    # parts is the list of parts in the object hierarchy to be traversed, or
712    # an empty list if just the module is wanted.
713    result = mod
714    for p in parts:
715        result = getattr(result, p)
716    return result
717