1"""Utilities to support packages.""" 2 3from collections import namedtuple 4from functools import singledispatch as simplegeneric 5import importlib 6import importlib.util 7import importlib.machinery 8import os 9import os.path 10import sys 11from types import ModuleType 12import warnings 13 14__all__ = [ 15 'get_importer', 'iter_importers', 'get_loader', 'find_loader', 16 'walk_packages', 'iter_modules', 'get_data', 17 'ImpImporter', 'ImpLoader', 'read_code', 'extend_path', 18 'ModuleInfo', 19] 20 21 22ModuleInfo = namedtuple('ModuleInfo', 'module_finder name ispkg') 23ModuleInfo.__doc__ = 'A namedtuple with minimal info about a module.' 24 25 26def _get_spec(finder, name): 27 """Return the finder-specific module spec.""" 28 # Works with legacy finders. 29 try: 30 find_spec = finder.find_spec 31 except AttributeError: 32 loader = finder.find_module(name) 33 if loader is None: 34 return None 35 return importlib.util.spec_from_loader(name, loader) 36 else: 37 return find_spec(name) 38 39 40def read_code(stream): 41 # This helper is needed in order for the PEP 302 emulation to 42 # correctly handle compiled files 43 import marshal 44 45 magic = stream.read(4) 46 if magic != importlib.util.MAGIC_NUMBER: 47 return None 48 49 stream.read(12) # Skip rest of the header 50 return marshal.load(stream) 51 52 53def walk_packages(path=None, prefix='', onerror=None): 54 """Yields ModuleInfo for all modules recursively 55 on path, or, if path is None, all accessible modules. 56 57 'path' should be either None or a list of paths to look for 58 modules in. 59 60 'prefix' is a string to output on the front of every module name 61 on output. 62 63 Note that this function must import all *packages* (NOT all 64 modules!) on the given path, in order to access the __path__ 65 attribute to find submodules. 66 67 'onerror' is a function which gets called with one argument (the 68 name of the package which was being imported) if any exception 69 occurs while trying to import a package. If no onerror function is 70 supplied, ImportErrors are caught and ignored, while all other 71 exceptions are propagated, terminating the search. 72 73 Examples: 74 75 # list all modules python can access 76 walk_packages() 77 78 # list all submodules of ctypes 79 walk_packages(ctypes.__path__, ctypes.__name__+'.') 80 """ 81 82 def seen(p, m={}): 83 if p in m: 84 return True 85 m[p] = True 86 87 for info in iter_modules(path, prefix): 88 yield info 89 90 if info.ispkg: 91 try: 92 __import__(info.name) 93 except ImportError: 94 if onerror is not None: 95 onerror(info.name) 96 except Exception: 97 if onerror is not None: 98 onerror(info.name) 99 else: 100 raise 101 else: 102 path = getattr(sys.modules[info.name], '__path__', None) or [] 103 104 # don't traverse path items we've seen before 105 path = [p for p in path if not seen(p)] 106 107 yield from walk_packages(path, info.name+'.', onerror) 108 109 110def iter_modules(path=None, prefix=''): 111 """Yields ModuleInfo for all submodules on path, 112 or, if path is None, all top-level modules on sys.path. 113 114 'path' should be either None or a list of paths to look for 115 modules in. 116 117 'prefix' is a string to output on the front of every module name 118 on output. 119 """ 120 if path is None: 121 importers = iter_importers() 122 elif isinstance(path, str): 123 raise ValueError("path must be None or list of paths to look for " 124 "modules in") 125 else: 126 importers = map(get_importer, path) 127 128 yielded = {} 129 for i in importers: 130 for name, ispkg in iter_importer_modules(i, prefix): 131 if name not in yielded: 132 yielded[name] = 1 133 yield ModuleInfo(i, name, ispkg) 134 135 136@simplegeneric 137def iter_importer_modules(importer, prefix=''): 138 if not hasattr(importer, 'iter_modules'): 139 return [] 140 return importer.iter_modules(prefix) 141 142 143# Implement a file walker for the normal importlib path hook 144def _iter_file_finder_modules(importer, prefix=''): 145 if importer.path is None or not os.path.isdir(importer.path): 146 return 147 148 yielded = {} 149 import inspect 150 try: 151 filenames = os.listdir(importer.path) 152 except OSError: 153 # ignore unreadable directories like import does 154 filenames = [] 155 filenames.sort() # handle packages before same-named modules 156 157 for fn in filenames: 158 modname = inspect.getmodulename(fn) 159 if modname=='__init__' or modname in yielded: 160 continue 161 162 path = os.path.join(importer.path, fn) 163 ispkg = False 164 165 if not modname and os.path.isdir(path) and '.' not in fn: 166 modname = fn 167 try: 168 dircontents = os.listdir(path) 169 except OSError: 170 # ignore unreadable directories like import does 171 dircontents = [] 172 for fn in dircontents: 173 subname = inspect.getmodulename(fn) 174 if subname=='__init__': 175 ispkg = True 176 break 177 else: 178 continue # not a package 179 180 if modname and '.' not in modname: 181 yielded[modname] = 1 182 yield prefix + modname, ispkg 183 184iter_importer_modules.register( 185 importlib.machinery.FileFinder, _iter_file_finder_modules) 186 187 188def _import_imp(): 189 global imp 190 with warnings.catch_warnings(): 191 warnings.simplefilter('ignore', DeprecationWarning) 192 imp = importlib.import_module('imp') 193 194class ImpImporter: 195 """PEP 302 Finder that wraps Python's "classic" import algorithm 196 197 ImpImporter(dirname) produces a PEP 302 finder that searches that 198 directory. ImpImporter(None) produces a PEP 302 finder that searches 199 the current sys.path, plus any modules that are frozen or built-in. 200 201 Note that ImpImporter does not currently support being used by placement 202 on sys.meta_path. 203 """ 204 205 def __init__(self, path=None): 206 global imp 207 warnings.warn("This emulation is deprecated, use 'importlib' instead", 208 DeprecationWarning) 209 _import_imp() 210 self.path = path 211 212 def find_module(self, fullname, path=None): 213 # Note: we ignore 'path' argument since it is only used via meta_path 214 subname = fullname.split(".")[-1] 215 if subname != fullname and self.path is None: 216 return None 217 if self.path is None: 218 path = None 219 else: 220 path = [os.path.realpath(self.path)] 221 try: 222 file, filename, etc = imp.find_module(subname, path) 223 except ImportError: 224 return None 225 return ImpLoader(fullname, file, filename, etc) 226 227 def iter_modules(self, prefix=''): 228 if self.path is None or not os.path.isdir(self.path): 229 return 230 231 yielded = {} 232 import inspect 233 try: 234 filenames = os.listdir(self.path) 235 except OSError: 236 # ignore unreadable directories like import does 237 filenames = [] 238 filenames.sort() # handle packages before same-named modules 239 240 for fn in filenames: 241 modname = inspect.getmodulename(fn) 242 if modname=='__init__' or modname in yielded: 243 continue 244 245 path = os.path.join(self.path, fn) 246 ispkg = False 247 248 if not modname and os.path.isdir(path) and '.' not in fn: 249 modname = fn 250 try: 251 dircontents = os.listdir(path) 252 except OSError: 253 # ignore unreadable directories like import does 254 dircontents = [] 255 for fn in dircontents: 256 subname = inspect.getmodulename(fn) 257 if subname=='__init__': 258 ispkg = True 259 break 260 else: 261 continue # not a package 262 263 if modname and '.' not in modname: 264 yielded[modname] = 1 265 yield prefix + modname, ispkg 266 267 268class ImpLoader: 269 """PEP 302 Loader that wraps Python's "classic" import algorithm 270 """ 271 code = source = None 272 273 def __init__(self, fullname, file, filename, etc): 274 warnings.warn("This emulation is deprecated, use 'importlib' instead", 275 DeprecationWarning) 276 _import_imp() 277 self.file = file 278 self.filename = filename 279 self.fullname = fullname 280 self.etc = etc 281 282 def load_module(self, fullname): 283 self._reopen() 284 try: 285 mod = imp.load_module(fullname, self.file, self.filename, self.etc) 286 finally: 287 if self.file: 288 self.file.close() 289 # Note: we don't set __loader__ because we want the module to look 290 # normal; i.e. this is just a wrapper for standard import machinery 291 return mod 292 293 def get_data(self, pathname): 294 with open(pathname, "rb") as file: 295 return file.read() 296 297 def _reopen(self): 298 if self.file and self.file.closed: 299 mod_type = self.etc[2] 300 if mod_type==imp.PY_SOURCE: 301 self.file = open(self.filename, 'r') 302 elif mod_type in (imp.PY_COMPILED, imp.C_EXTENSION): 303 self.file = open(self.filename, 'rb') 304 305 def _fix_name(self, fullname): 306 if fullname is None: 307 fullname = self.fullname 308 elif fullname != self.fullname: 309 raise ImportError("Loader for module %s cannot handle " 310 "module %s" % (self.fullname, fullname)) 311 return fullname 312 313 def is_package(self, fullname): 314 fullname = self._fix_name(fullname) 315 return self.etc[2]==imp.PKG_DIRECTORY 316 317 def get_code(self, fullname=None): 318 fullname = self._fix_name(fullname) 319 if self.code is None: 320 mod_type = self.etc[2] 321 if mod_type==imp.PY_SOURCE: 322 source = self.get_source(fullname) 323 self.code = compile(source, self.filename, 'exec') 324 elif mod_type==imp.PY_COMPILED: 325 self._reopen() 326 try: 327 self.code = read_code(self.file) 328 finally: 329 self.file.close() 330 elif mod_type==imp.PKG_DIRECTORY: 331 self.code = self._get_delegate().get_code() 332 return self.code 333 334 def get_source(self, fullname=None): 335 fullname = self._fix_name(fullname) 336 if self.source is None: 337 mod_type = self.etc[2] 338 if mod_type==imp.PY_SOURCE: 339 self._reopen() 340 try: 341 self.source = self.file.read() 342 finally: 343 self.file.close() 344 elif mod_type==imp.PY_COMPILED: 345 if os.path.exists(self.filename[:-1]): 346 with open(self.filename[:-1], 'r') as f: 347 self.source = f.read() 348 elif mod_type==imp.PKG_DIRECTORY: 349 self.source = self._get_delegate().get_source() 350 return self.source 351 352 def _get_delegate(self): 353 finder = ImpImporter(self.filename) 354 spec = _get_spec(finder, '__init__') 355 return spec.loader 356 357 def get_filename(self, fullname=None): 358 fullname = self._fix_name(fullname) 359 mod_type = self.etc[2] 360 if mod_type==imp.PKG_DIRECTORY: 361 return self._get_delegate().get_filename() 362 elif mod_type in (imp.PY_SOURCE, imp.PY_COMPILED, imp.C_EXTENSION): 363 return self.filename 364 return None 365 366 367try: 368 import zipimport 369 from zipimport import zipimporter 370 371 def iter_zipimport_modules(importer, prefix=''): 372 dirlist = sorted(zipimport._zip_directory_cache[importer.archive]) 373 _prefix = importer.prefix 374 plen = len(_prefix) 375 yielded = {} 376 import inspect 377 for fn in dirlist: 378 if not fn.startswith(_prefix): 379 continue 380 381 fn = fn[plen:].split(os.sep) 382 383 if len(fn)==2 and fn[1].startswith('__init__.py'): 384 if fn[0] not in yielded: 385 yielded[fn[0]] = 1 386 yield prefix + fn[0], True 387 388 if len(fn)!=1: 389 continue 390 391 modname = inspect.getmodulename(fn[0]) 392 if modname=='__init__': 393 continue 394 395 if modname and '.' not in modname and modname not in yielded: 396 yielded[modname] = 1 397 yield prefix + modname, False 398 399 iter_importer_modules.register(zipimporter, iter_zipimport_modules) 400 401except ImportError: 402 pass 403 404 405def get_importer(path_item): 406 """Retrieve a finder for the given path item 407 408 The returned finder is cached in sys.path_importer_cache 409 if it was newly created by a path hook. 410 411 The cache (or part of it) can be cleared manually if a 412 rescan of sys.path_hooks is necessary. 413 """ 414 try: 415 importer = sys.path_importer_cache[path_item] 416 except KeyError: 417 for path_hook in sys.path_hooks: 418 try: 419 importer = path_hook(path_item) 420 sys.path_importer_cache.setdefault(path_item, importer) 421 break 422 except ImportError: 423 pass 424 else: 425 importer = None 426 return importer 427 428 429def iter_importers(fullname=""): 430 """Yield finders for the given module name 431 432 If fullname contains a '.', the finders will be for the package 433 containing fullname, otherwise they will be all registered top level 434 finders (i.e. those on both sys.meta_path and sys.path_hooks). 435 436 If the named module is in a package, that package is imported as a side 437 effect of invoking this function. 438 439 If no module name is specified, all top level finders are produced. 440 """ 441 if fullname.startswith('.'): 442 msg = "Relative module name {!r} not supported".format(fullname) 443 raise ImportError(msg) 444 if '.' in fullname: 445 # Get the containing package's __path__ 446 pkg_name = fullname.rpartition(".")[0] 447 pkg = importlib.import_module(pkg_name) 448 path = getattr(pkg, '__path__', None) 449 if path is None: 450 return 451 else: 452 yield from sys.meta_path 453 path = sys.path 454 for item in path: 455 yield get_importer(item) 456 457 458def get_loader(module_or_name): 459 """Get a "loader" object for module_or_name 460 461 Returns None if the module cannot be found or imported. 462 If the named module is not already imported, its containing package 463 (if any) is imported, in order to establish the package __path__. 464 """ 465 if module_or_name in sys.modules: 466 module_or_name = sys.modules[module_or_name] 467 if module_or_name is None: 468 return None 469 if isinstance(module_or_name, ModuleType): 470 module = module_or_name 471 loader = getattr(module, '__loader__', None) 472 if loader is not None: 473 return loader 474 if getattr(module, '__spec__', None) is None: 475 return None 476 fullname = module.__name__ 477 else: 478 fullname = module_or_name 479 return find_loader(fullname) 480 481 482def find_loader(fullname): 483 """Find a "loader" object for fullname 484 485 This is a backwards compatibility wrapper around 486 importlib.util.find_spec that converts most failures to ImportError 487 and only returns the loader rather than the full spec 488 """ 489 if fullname.startswith('.'): 490 msg = "Relative module name {!r} not supported".format(fullname) 491 raise ImportError(msg) 492 try: 493 spec = importlib.util.find_spec(fullname) 494 except (ImportError, AttributeError, TypeError, ValueError) as ex: 495 # This hack fixes an impedance mismatch between pkgutil and 496 # importlib, where the latter raises other errors for cases where 497 # pkgutil previously raised ImportError 498 msg = "Error while finding loader for {!r} ({}: {})" 499 raise ImportError(msg.format(fullname, type(ex), ex)) from ex 500 return spec.loader if spec is not None else None 501 502 503def extend_path(path, name): 504 """Extend a package's path. 505 506 Intended use is to place the following code in a package's __init__.py: 507 508 from pkgutil import extend_path 509 __path__ = extend_path(__path__, __name__) 510 511 This will add to the package's __path__ all subdirectories of 512 directories on sys.path named after the package. This is useful 513 if one wants to distribute different parts of a single logical 514 package as multiple directories. 515 516 It also looks for *.pkg files beginning where * matches the name 517 argument. This feature is similar to *.pth files (see site.py), 518 except that it doesn't special-case lines starting with 'import'. 519 A *.pkg file is trusted at face value: apart from checking for 520 duplicates, all entries found in a *.pkg file are added to the 521 path, regardless of whether they are exist the filesystem. (This 522 is a feature.) 523 524 If the input path is not a list (as is the case for frozen 525 packages) it is returned unchanged. The input path is not 526 modified; an extended copy is returned. Items are only appended 527 to the copy at the end. 528 529 It is assumed that sys.path is a sequence. Items of sys.path that 530 are not (unicode or 8-bit) strings referring to existing 531 directories are ignored. Unicode items of sys.path that cause 532 errors when used as filenames may cause this function to raise an 533 exception (in line with os.path.isdir() behavior). 534 """ 535 536 if not isinstance(path, list): 537 # This could happen e.g. when this is called from inside a 538 # frozen package. Return the path unchanged in that case. 539 return path 540 541 sname_pkg = name + ".pkg" 542 543 path = path[:] # Start with a copy of the existing path 544 545 parent_package, _, final_name = name.rpartition('.') 546 if parent_package: 547 try: 548 search_path = sys.modules[parent_package].__path__ 549 except (KeyError, AttributeError): 550 # We can't do anything: find_loader() returns None when 551 # passed a dotted name. 552 return path 553 else: 554 search_path = sys.path 555 556 for dir in search_path: 557 if not isinstance(dir, str): 558 continue 559 560 finder = get_importer(dir) 561 if finder is not None: 562 portions = [] 563 if hasattr(finder, 'find_spec'): 564 spec = finder.find_spec(final_name) 565 if spec is not None: 566 portions = spec.submodule_search_locations or [] 567 # Is this finder PEP 420 compliant? 568 elif hasattr(finder, 'find_loader'): 569 _, portions = finder.find_loader(final_name) 570 571 for portion in portions: 572 # XXX This may still add duplicate entries to path on 573 # case-insensitive filesystems 574 if portion not in path: 575 path.append(portion) 576 577 # XXX Is this the right thing for subpackages like zope.app? 578 # It looks for a file named "zope.app.pkg" 579 pkgfile = os.path.join(dir, sname_pkg) 580 if os.path.isfile(pkgfile): 581 try: 582 f = open(pkgfile) 583 except OSError as msg: 584 sys.stderr.write("Can't open %s: %s\n" % 585 (pkgfile, msg)) 586 else: 587 with f: 588 for line in f: 589 line = line.rstrip('\n') 590 if not line or line.startswith('#'): 591 continue 592 path.append(line) # Don't check for existence! 593 594 return path 595 596 597def get_data(package, resource): 598 """Get a resource from a package. 599 600 This is a wrapper round the PEP 302 loader get_data API. The package 601 argument should be the name of a package, in standard module format 602 (foo.bar). The resource argument should be in the form of a relative 603 filename, using '/' as the path separator. The parent directory name '..' 604 is not allowed, and nor is a rooted name (starting with a '/'). 605 606 The function returns a binary string, which is the contents of the 607 specified resource. 608 609 For packages located in the filesystem, which have already been imported, 610 this is the rough equivalent of 611 612 d = os.path.dirname(sys.modules[package].__file__) 613 data = open(os.path.join(d, resource), 'rb').read() 614 615 If the package cannot be located or loaded, or it uses a PEP 302 loader 616 which does not support get_data(), then None is returned. 617 """ 618 619 spec = importlib.util.find_spec(package) 620 if spec is None: 621 return None 622 loader = spec.loader 623 if loader is None or not hasattr(loader, 'get_data'): 624 return None 625 # XXX needs test 626 mod = (sys.modules.get(package) or 627 importlib._bootstrap._load(spec)) 628 if mod is None or not hasattr(mod, '__file__'): 629 return None 630 631 # Modify the resource name to be compatible with the loader.get_data 632 # signature - an os.path format "filename" starting with the dirname of 633 # the package's __file__ 634 parts = resource.split('/') 635 parts.insert(0, os.path.dirname(mod.__file__)) 636 resource_name = os.path.join(*parts) 637 return loader.get_data(resource_name) 638