1"""Utilities to support packages."""
2
3# NOTE: This module must remain compatible with Python 2.3, as it is shared
4# by setuptools for distribution with Python 2.3 and up.
5
6import os
7import sys
8import imp
9import os.path
10from types import ModuleType
11from org.python.core import imp as _imp, BytecodeLoader
12
13__all__ = [
14    'get_importer', 'iter_importers', 'get_loader', 'find_loader',
15    'walk_packages', 'iter_modules',
16    'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
17]
18
19
20# equivalent to CPythonLib's pkgutil.read_code except that we need
21# diff args to pass into our underlying imp implementation, as
22# accessed by _imp here
23
24def read_jython_code(fullname, file, filename):
25    data = _imp.readCode(filename, file, False)
26    return BytecodeLoader.makeCode(fullname + "$py", data, filename)
27
28read_code = read_jython_code
29
30def simplegeneric(func):
31    """Make a trivial single-dispatch generic function"""
32    registry = {}
33    def wrapper(*args, **kw):
34        ob = args[0]
35        try:
36            cls = ob.__class__
37        except AttributeError:
38            cls = type(ob)
39        try:
40            mro = cls.__mro__
41        except AttributeError:
42            try:
43                class cls(cls, object):
44                    pass
45                mro = cls.__mro__[1:]
46            except TypeError:
47                mro = object,   # must be an ExtensionClass or some such  :(
48        for t in mro:
49            if t in registry:
50                return registry[t](*args, **kw)
51        else:
52            return func(*args, **kw)
53    try:
54        wrapper.__name__ = func.__name__
55    except (TypeError, AttributeError):
56        pass    # Python 2.3 doesn't allow functions to be renamed
57
58    def register(typ, func=None):
59        if func is None:
60            return lambda f: register(typ, f)
61        registry[typ] = func
62        return func
63
64    wrapper.__dict__ = func.__dict__
65    wrapper.__doc__ = func.__doc__
66    wrapper.register = register
67    return wrapper
68
69
70def walk_packages(path=None, prefix='', onerror=None):
71    """Yields (module_loader, name, ispkg) for all modules recursively
72    on path, or, if path is None, all accessible modules.
73
74    'path' should be either None or a list of paths to look for
75    modules in.
76
77    'prefix' is a string to output on the front of every module name
78    on output.
79
80    Note that this function must import all *packages* (NOT all
81    modules!) on the given path, in order to access the __path__
82    attribute to find submodules.
83
84    'onerror' is a function which gets called with one argument (the
85    name of the package which was being imported) if any exception
86    occurs while trying to import a package.  If no onerror function is
87    supplied, ImportErrors are caught and ignored, while all other
88    exceptions are propagated, terminating the search.
89
90    Examples:
91
92    # list all modules python can access
93    walk_packages()
94
95    # list all submodules of ctypes
96    walk_packages(ctypes.__path__, ctypes.__name__+'.')
97    """
98
99    def seen(p, m={}):
100        if p in m:
101            return True
102        m[p] = True
103
104    for importer, name, ispkg in iter_modules(path, prefix):
105        yield importer, name, ispkg
106
107        if ispkg:
108            try:
109                __import__(name)
110            except ImportError:
111                if onerror is not None:
112                    onerror(name)
113            except Exception:
114                if onerror is not None:
115                    onerror(name)
116                else:
117                    raise
118            else:
119                path = getattr(sys.modules[name], '__path__', None) or []
120
121                # don't traverse path items we've seen before
122                path = [p for p in path if not seen(p)]
123
124                for item in walk_packages(path, name+'.', onerror):
125                    yield item
126
127
128def iter_modules(path=None, prefix=''):
129    """Yields (module_loader, name, ispkg) for all submodules on path,
130    or, if path is None, all top-level modules on sys.path.
131
132    'path' should be either None or a list of paths to look for
133    modules in.
134
135    'prefix' is a string to output on the front of every module name
136    on output.
137    """
138
139    if path is None:
140        importers = iter_importers()
141    else:
142        importers = map(get_importer, path)
143
144    yielded = {}
145    for i in importers:
146        for name, ispkg in iter_importer_modules(i, prefix):
147            if name not in yielded:
148                yielded[name] = 1
149                yield i, name, ispkg
150
151
152#@simplegeneric
153def iter_importer_modules(importer, prefix=''):
154    if not hasattr(importer, 'iter_modules'):
155        return []
156    return importer.iter_modules(prefix)
157
158iter_importer_modules = simplegeneric(iter_importer_modules)
159
160
161class ImpImporter:
162    """PEP 302 Importer that wraps Python's "classic" import algorithm
163
164    ImpImporter(dirname) produces a PEP 302 importer that searches that
165    directory.  ImpImporter(None) produces a PEP 302 importer that searches
166    the current sys.path, plus any modules that are frozen or built-in.
167
168    Note that ImpImporter does not currently support being used by placement
169    on sys.meta_path.
170    """
171
172    def __init__(self, path=None):
173        self.path = path
174
175    def find_module(self, fullname, path=None):
176        # Note: we ignore 'path' argument since it is only used via meta_path
177        subname = fullname.split(".")[-1]
178        if subname != fullname and self.path is None:
179            return None
180        if self.path is None:
181            path = None
182        else:
183            path = [os.path.realpath(self.path)]
184        try:
185            file, filename, etc = imp.find_module(subname, path)
186        except ImportError:
187            return None
188        return ImpLoader(fullname, file, filename, etc)
189
190    def iter_modules(self, prefix=''):
191        if self.path is None or not os.path.isdir(self.path):
192            return
193
194        yielded = {}
195        import inspect
196
197        filenames = os.listdir(self.path)
198        filenames.sort()  # handle packages before same-named modules
199
200        for fn in filenames:
201            modname = inspect.getmodulename(fn)
202            if modname=='__init__' or modname in yielded:
203                continue
204
205            path = os.path.join(self.path, fn)
206            ispkg = False
207
208            if not modname and os.path.isdir(path) and '.' not in fn:
209                modname = fn
210                for fn in os.listdir(path):
211                    subname = inspect.getmodulename(fn)
212                    if subname=='__init__':
213                        ispkg = True
214                        break
215                else:
216                    continue    # not a package
217
218            if modname and '.' not in modname:
219                yielded[modname] = 1
220                yield prefix + modname, ispkg
221
222
223class ImpLoader:
224    """PEP 302 Loader that wraps Python's "classic" import algorithm
225    """
226    code = source = None
227
228    def __init__(self, fullname, file, filename, etc):
229        self.file = file
230        self.filename = filename
231        self.fullname = fullname
232        self.etc = etc
233
234    def load_module(self, fullname):
235        self._reopen()
236        try:
237            mod = imp.load_module(fullname, self.file, self.filename, self.etc)
238        finally:
239            if self.file:
240                self.file.close()
241        # Note: we don't set __loader__ because we want the module to look
242        # normal; i.e. this is just a wrapper for standard import machinery
243        return mod
244
245    def get_data(self, pathname):
246        f = open(pathname, "rb")
247        try:
248            return f.read()
249        finally:
250            f.close()
251
252    def _reopen(self):
253        if self.file and self.file.closed:
254            mod_type = self.etc[2]
255            if mod_type==imp.PY_SOURCE:
256                self.file = open(self.filename, 'rU')
257            elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION):
258                self.file = open(self.filename, 'rb')
259
260    def _fix_name(self, fullname):
261        if fullname is None:
262            fullname = self.fullname
263        elif fullname != self.fullname:
264            raise ImportError("Loader for module %s cannot handle "
265                              "module %s" % (self.fullname, fullname))
266        return fullname
267
268    def is_package(self, fullname):
269        fullname = self._fix_name(fullname)
270        return self.etc[2]==imp.PKG_DIRECTORY
271
272    def get_code(self, fullname=None):
273        fullname = self._fix_name(fullname)
274        if self.code is None:
275            mod_type = self.etc[2]
276            if mod_type==imp.PY_SOURCE:
277                source = self.get_source(fullname)
278                self.code = compile(source, self.filename, 'exec')
279            elif mod_type==imp.PY_COMPILED:
280                self._reopen()
281                try:
282                    self.code = read_jython_code(fullname, self.file, self.filename)
283                finally:
284                    self.file.close()
285            elif mod_type==imp.PKG_DIRECTORY:
286                self.code = self._get_delegate().get_code()
287        return self.code
288
289    def get_source(self, fullname=None):
290        fullname = self._fix_name(fullname)
291        if self.source is None:
292            mod_type = self.etc[2]
293            if mod_type==imp.PY_SOURCE:
294                self._reopen()
295                try:
296                    self.source = self.file.read()
297                finally:
298                    self.file.close()
299            elif mod_type==imp.PY_COMPILED:
300                if os.path.exists(self.filename[:-1]):
301                    f = open(self.filename[:-1], 'rU')
302                    try:
303                        self.source = f.read()
304                    finally:
305                        f.close()
306            elif mod_type==imp.PKG_DIRECTORY:
307                self.source = self._get_delegate().get_source()
308        return self.source
309
310
311    def _get_delegate(self):
312        return ImpImporter(self.filename).find_module('__init__')
313
314    def get_filename(self, fullname=None):
315        fullname = self._fix_name(fullname)
316        mod_type = self.etc[2]
317        if self.etc[2]==imp.PKG_DIRECTORY:
318            return self._get_delegate().get_filename()
319        elif self.etc[2] in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION):
320            return self.filename
321        return None
322
323
324try:
325    import zipimport
326    from zipimport import zipimporter
327
328    def iter_zipimport_modules(importer, prefix=''):
329        dirlist = zipimport._zip_directory_cache[importer.archive].keys()
330        dirlist.sort()
331        _prefix = importer.prefix
332        plen = len(_prefix)
333        yielded = {}
334        import inspect
335        for fn in dirlist:
336            if not fn.startswith(_prefix):
337                continue
338
339            fn = fn[plen:].split(os.sep)
340
341            if len(fn)==2 and fn[1].startswith('__init__.py'):
342                if fn[0] not in yielded:
343                    yielded[fn[0]] = 1
344                    yield fn[0], True
345
346            if len(fn)!=1:
347                continue
348
349            modname = inspect.getmodulename(fn[0])
350            if modname=='__init__':
351                continue
352
353            if modname and '.' not in modname and modname not in yielded:
354                yielded[modname] = 1
355                yield prefix + modname, False
356
357    iter_importer_modules.register(zipimporter, iter_zipimport_modules)
358
359except ImportError:
360    pass
361
362
363def get_importer(path_item):
364    """Retrieve a PEP 302 importer for the given path item
365
366    The returned importer is cached in sys.path_importer_cache
367    if it was newly created by a path hook.
368
369    If there is no importer, a wrapper around the basic import
370    machinery is returned. This wrapper is never inserted into
371    the importer cache (None is inserted instead).
372
373    The cache (or part of it) can be cleared manually if a
374    rescan of sys.path_hooks is necessary.
375    """
376    try:
377        importer = sys.path_importer_cache[path_item]
378    except KeyError:
379        for path_hook in sys.path_hooks:
380            try:
381                importer = path_hook(path_item)
382                break
383            except ImportError:
384                pass
385        else:
386            importer = None
387        sys.path_importer_cache.setdefault(path_item, importer)
388
389    if importer is None:
390        try:
391            importer = ImpImporter(path_item)
392        except ImportError:
393            importer = None
394    return importer
395
396
397def iter_importers(fullname=""):
398    """Yield PEP 302 importers for the given module name
399
400    If fullname contains a '.', the importers will be for the package
401    containing fullname, otherwise they will be importers for sys.meta_path,
402    sys.path, and Python's "classic" import machinery, in that order.  If
403    the named module is in a package, that package is imported as a side
404    effect of invoking this function.
405
406    Non PEP 302 mechanisms (e.g. the Windows registry) used by the
407    standard import machinery to find files in alternative locations
408    are partially supported, but are searched AFTER sys.path. Normally,
409    these locations are searched BEFORE sys.path, preventing sys.path
410    entries from shadowing them.
411
412    For this to cause a visible difference in behaviour, there must
413    be a module or package name that is accessible via both sys.path
414    and one of the non PEP 302 file system mechanisms. In this case,
415    the emulation will find the former version, while the builtin
416    import mechanism will find the latter.
417
418    Items of the following types can be affected by this discrepancy:
419        imp.C_EXTENSION, imp.PY_SOURCE, imp.PY_COMPILED, imp.PKG_DIRECTORY
420    """
421    if fullname.startswith('.'):
422        raise ImportError("Relative module names not supported")
423    if '.' in fullname:
424        # Get the containing package's __path__
425        pkg = '.'.join(fullname.split('.')[:-1])
426        if pkg not in sys.modules:
427            __import__(pkg)
428        path = getattr(sys.modules[pkg], '__path__', None) or []
429    else:
430        for importer in sys.meta_path:
431            yield importer
432        path = sys.path
433    for item in path:
434        yield get_importer(item)
435    if '.' not in fullname:
436        yield ImpImporter()
437
438def get_loader(module_or_name):
439    """Get a PEP 302 "loader" object for module_or_name
440
441    If the module or package is accessible via the normal import
442    mechanism, a wrapper around the relevant part of that machinery
443    is returned.  Returns None if the module cannot be found or imported.
444    If the named module is not already imported, its containing package
445    (if any) is imported, in order to establish the package __path__.
446
447    This function uses iter_importers(), and is thus subject to the same
448    limitations regarding platform-specific special import locations such
449    as the Windows registry.
450    """
451    if module_or_name in sys.modules:
452        module_or_name = sys.modules[module_or_name]
453    if isinstance(module_or_name, ModuleType):
454        module = module_or_name
455        loader = getattr(module, '__loader__', None)
456        if loader is not None:
457            return loader
458        fullname = module.__name__
459    elif module_or_name == sys:
460        # Jython sys is not a real module; fake it here for now since
461        # making it a module requires a fair amount of decoupling from
462        # PySystemState
463        fullname = "sys"
464    else:
465        fullname = module_or_name
466    return find_loader(fullname)
467
468def find_loader(fullname):
469    """Find a PEP 302 "loader" object for fullname
470
471    If fullname contains dots, path must be the containing package's __path__.
472    Returns None if the module cannot be found or imported. This function uses
473    iter_importers(), and is thus subject to the same limitations regarding
474    platform-specific special import locations such as the Windows registry.
475    """
476    for importer in iter_importers(fullname):
477        loader = importer.find_module(fullname)
478        if loader is not None:
479            return loader
480
481    return None
482
483
484def extend_path(path, name):
485    """Extend a package's path.
486
487    Intended use is to place the following code in a package's __init__.py:
488
489        from pkgutil import extend_path
490        __path__ = extend_path(__path__, __name__)
491
492    This will add to the package's __path__ all subdirectories of
493    directories on sys.path named after the package.  This is useful
494    if one wants to distribute different parts of a single logical
495    package as multiple directories.
496
497    It also looks for *.pkg files beginning where * matches the name
498    argument.  This feature is similar to *.pth files (see site.py),
499    except that it doesn't special-case lines starting with 'import'.
500    A *.pkg file is trusted at face value: apart from checking for
501    duplicates, all entries found in a *.pkg file are added to the
502    path, regardless of whether they are exist the filesystem.  (This
503    is a feature.)
504
505    If the input path is not a list (as is the case for frozen
506    packages) it is returned unchanged.  The input path is not
507    modified; an extended copy is returned.  Items are only appended
508    to the copy at the end.
509
510    It is assumed that sys.path is a sequence.  Items of sys.path that
511    are not (unicode or 8-bit) strings referring to existing
512    directories are ignored.  Unicode items of sys.path that cause
513    errors when used as filenames may cause this function to raise an
514    exception (in line with os.path.isdir() behavior).
515    """
516
517    if not isinstance(path, list):
518        # This could happen e.g. when this is called from inside a
519        # frozen package.  Return the path unchanged in that case.
520        return path
521
522    pname = os.path.join(*name.split('.')) # Reconstitute as relative path
523    # Just in case os.extsep != '.'
524    sname = os.extsep.join(name.split('.'))
525    sname_pkg = sname + os.extsep + "pkg"
526    init_py = "__init__" + os.extsep + "py"
527
528    path = path[:] # Start with a copy of the existing path
529
530    for dir in sys.path:
531        if not isinstance(dir, basestring) or not os.path.isdir(dir):
532            continue
533        subdir = os.path.join(dir, pname)
534        # XXX This may still add duplicate entries to path on
535        # case-insensitive filesystems
536        initfile = os.path.join(subdir, init_py)
537        if subdir not in path and os.path.isfile(initfile):
538            path.append(subdir)
539        # XXX Is this the right thing for subpackages like zope.app?
540        # It looks for a file named "zope.app.pkg"
541        pkgfile = os.path.join(dir, sname_pkg)
542        if os.path.isfile(pkgfile):
543            try:
544                f = open(pkgfile)
545            except IOError, msg:
546                sys.stderr.write("Can't open %s: %s\n" %
547                                 (pkgfile, msg))
548            else:
549                try:
550                    for line in f:
551                        line = line.rstrip('\n')
552                        if not line or line.startswith('#'):
553                            continue
554                        path.append(line) # Don't check for existence!
555                finally:
556                    f.close()
557
558    return path
559
560def get_data(package, resource):
561    """Get a resource from a package.
562
563    This is a wrapper round the PEP 302 loader get_data API. The package
564    argument should be the name of a package, in standard module format
565    (foo.bar). The resource argument should be in the form of a relative
566    filename, using '/' as the path separator. The parent directory name '..'
567    is not allowed, and nor is a rooted name (starting with a '/').
568
569    The function returns a binary string, which is the contents of the
570    specified resource.
571
572    For packages located in the filesystem, which have already been imported,
573    this is the rough equivalent of
574
575        d = os.path.dirname(sys.modules[package].__file__)
576        data = open(os.path.join(d, resource), 'rb').read()
577
578    If the package cannot be located or loaded, or it uses a PEP 302 loader
579    which does not support get_data(), then None is returned.
580    """
581
582    loader = get_loader(package)
583    if loader is None or not hasattr(loader, 'get_data'):
584        return None
585    mod = sys.modules.get(package) or loader.load_module(package)
586    if mod is None or not hasattr(mod, '__file__'):
587        return None
588
589    # Modify the resource name to be compatible with the loader.get_data
590    # signature - an os.path format "filename" starting with the dirname of
591    # the package's __file__
592    parts = resource.split('/')
593    parts.insert(0, os.path.dirname(mod.__file__))
594    resource_name = os.path.join(*parts)
595    return loader.get_data(resource_name)
596