1# Copyright 2019, David Wilson 2# 3# Redistribution and use in source and binary forms, with or without 4# modification, are permitted provided that the following conditions are met: 5# 6# 1. Redistributions of source code must retain the above copyright notice, 7# this list of conditions and the following disclaimer. 8# 9# 2. Redistributions in binary form must reproduce the above copyright notice, 10# this list of conditions and the following disclaimer in the documentation 11# and/or other materials provided with the distribution. 12# 13# 3. Neither the name of the copyright holder nor the names of its contributors 14# may be used to endorse or promote products derived from this software without 15# specific prior written permission. 16# 17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27# POSSIBILITY OF SUCH DAMAGE. 28 29# !mitogen: minify_safe 30 31""" 32This module implements functionality required by master processes, such as 33starting new contexts via SSH. Its size is also restricted, since it must 34be sent to any context that will be used to establish additional child 35contexts. 36""" 37 38import dis 39import errno 40import imp 41import inspect 42import itertools 43import logging 44import os 45import pkgutil 46import re 47import string 48import sys 49import threading 50import types 51import zlib 52 53try: 54 import sysconfig 55except ImportError: 56 sysconfig = None 57 58if not hasattr(pkgutil, 'find_loader'): 59 # find_loader() was new in >=2.5, but the modern pkgutil.py syntax has 60 # been kept intentionally 2.3 compatible so we can reuse it. 61 from mitogen.compat import pkgutil 62 63import mitogen 64import mitogen.core 65import mitogen.minify 66import mitogen.parent 67 68from mitogen.core import b 69from mitogen.core import IOLOG 70from mitogen.core import LOG 71from mitogen.core import str_partition 72from mitogen.core import str_rpartition 73from mitogen.core import to_text 74 75imap = getattr(itertools, 'imap', map) 76izip = getattr(itertools, 'izip', zip) 77 78try: 79 any 80except NameError: 81 from mitogen.core import any 82 83try: 84 next 85except NameError: 86 from mitogen.core import next 87 88 89RLOG = logging.getLogger('mitogen.ctx') 90 91 92def _stdlib_paths(): 93 """ 94 Return a set of paths from which Python imports the standard library. 95 """ 96 attr_candidates = [ 97 'prefix', 98 'real_prefix', # virtualenv: only set inside a virtual environment. 99 'base_prefix', # venv: always set, equal to prefix if outside. 100 ] 101 prefixes = (getattr(sys, a, None) for a in attr_candidates) 102 version = 'python%s.%s' % sys.version_info[0:2] 103 s = set(os.path.abspath(os.path.join(p, 'lib', version)) 104 for p in prefixes if p is not None) 105 106 # When running 'unit2 tests/module_finder_test.py' in a Py2 venv on Ubuntu 107 # 18.10, above is insufficient to catch the real directory. 108 if sysconfig is not None: 109 s.add(sysconfig.get_config_var('DESTLIB')) 110 return s 111 112 113def is_stdlib_name(modname): 114 """ 115 Return :data:`True` if `modname` appears to come from the standard library. 116 """ 117 if imp.is_builtin(modname) != 0: 118 return True 119 120 module = sys.modules.get(modname) 121 if module is None: 122 return False 123 124 # six installs crap with no __file__ 125 modpath = os.path.abspath(getattr(module, '__file__', '')) 126 return is_stdlib_path(modpath) 127 128 129_STDLIB_PATHS = _stdlib_paths() 130 131 132def is_stdlib_path(path): 133 return any( 134 os.path.commonprefix((libpath, path)) == libpath 135 and 'site-packages' not in path 136 and 'dist-packages' not in path 137 for libpath in _STDLIB_PATHS 138 ) 139 140 141def get_child_modules(path): 142 """ 143 Return the suffixes of submodules directly neated beneath of the package 144 directory at `path`. 145 146 :param str path: 147 Path to the module's source code on disk, or some PEP-302-recognized 148 equivalent. Usually this is the module's ``__file__`` attribute, but 149 is specified explicitly to avoid loading the module. 150 151 :return: 152 List of submodule name suffixes. 153 """ 154 it = pkgutil.iter_modules([os.path.dirname(path)]) 155 return [to_text(name) for _, name, _ in it] 156 157 158def _looks_like_script(path): 159 """ 160 Return :data:`True` if the (possibly extensionless) file at `path` 161 resembles a Python script. For now we simply verify the file contains 162 ASCII text. 163 """ 164 try: 165 fp = open(path, 'rb') 166 except IOError: 167 e = sys.exc_info()[1] 168 if e.args[0] == errno.EISDIR: 169 return False 170 raise 171 172 try: 173 sample = fp.read(512).decode('latin-1') 174 return not set(sample).difference(string.printable) 175 finally: 176 fp.close() 177 178 179def _py_filename(path): 180 if not path: 181 return None 182 183 if path[-4:] in ('.pyc', '.pyo'): 184 path = path.rstrip('co') 185 186 if path.endswith('.py'): 187 return path 188 189 if os.path.exists(path) and _looks_like_script(path): 190 return path 191 192 193def _get_core_source(): 194 """ 195 Master version of parent.get_core_source(). 196 """ 197 source = inspect.getsource(mitogen.core) 198 return mitogen.minify.minimize_source(source) 199 200 201if mitogen.is_master: 202 # TODO: find a less surprising way of installing this. 203 mitogen.parent._get_core_source = _get_core_source 204 205 206LOAD_CONST = dis.opname.index('LOAD_CONST') 207IMPORT_NAME = dis.opname.index('IMPORT_NAME') 208 209 210def _getarg(nextb, c): 211 if c >= dis.HAVE_ARGUMENT: 212 return nextb() | (nextb() << 8) 213 214 215if sys.version_info < (3, 0): 216 def iter_opcodes(co): 217 # Yield `(op, oparg)` tuples from the code object `co`. 218 ordit = imap(ord, co.co_code) 219 nextb = ordit.next 220 return ((c, _getarg(nextb, c)) for c in ordit) 221elif sys.version_info < (3, 6): 222 def iter_opcodes(co): 223 # Yield `(op, oparg)` tuples from the code object `co`. 224 ordit = iter(co.co_code) 225 nextb = ordit.__next__ 226 return ((c, _getarg(nextb, c)) for c in ordit) 227else: 228 def iter_opcodes(co): 229 # Yield `(op, oparg)` tuples from the code object `co`. 230 ordit = iter(co.co_code) 231 nextb = ordit.__next__ 232 # https://github.com/abarnert/cpython/blob/c095a32f/Python/wordcode.md 233 return ((c, nextb()) for c in ordit) 234 235 236def scan_code_imports(co): 237 """ 238 Given a code object `co`, scan its bytecode yielding any ``IMPORT_NAME`` 239 and associated prior ``LOAD_CONST`` instructions representing an `Import` 240 statement or `ImportFrom` statement. 241 242 :return: 243 Generator producing `(level, modname, namelist)` tuples, where: 244 245 * `level`: -1 for normal import, 0, for absolute import, and >0 for 246 relative import. 247 * `modname`: Name of module to import, or from where `namelist` names 248 are imported. 249 * `namelist`: for `ImportFrom`, the list of names to be imported from 250 `modname`. 251 """ 252 opit = iter_opcodes(co) 253 opit, opit2, opit3 = itertools.tee(opit, 3) 254 255 try: 256 next(opit2) 257 next(opit3) 258 next(opit3) 259 except StopIteration: 260 return 261 262 if sys.version_info >= (2, 5): 263 for oparg1, oparg2, (op3, arg3) in izip(opit, opit2, opit3): 264 if op3 == IMPORT_NAME: 265 op2, arg2 = oparg2 266 op1, arg1 = oparg1 267 if op1 == op2 == LOAD_CONST: 268 yield (co.co_consts[arg1], 269 co.co_names[arg3], 270 co.co_consts[arg2] or ()) 271 else: 272 # Python 2.4 did not yet have 'level', so stack format differs. 273 for oparg1, (op2, arg2) in izip(opit, opit2): 274 if op2 == IMPORT_NAME: 275 op1, arg1 = oparg1 276 if op1 == LOAD_CONST: 277 yield (-1, co.co_names[arg2], co.co_consts[arg1] or ()) 278 279 280class ThreadWatcher(object): 281 """ 282 Manage threads that wait for another thread to shut down, before invoking 283 `on_join()` for each associated ThreadWatcher. 284 285 In CPython it seems possible to use this method to ensure a non-main thread 286 is signalled when the main thread has exited, using a third thread as a 287 proxy. 288 """ 289 #: Protects remaining _cls_* members. 290 _cls_lock = threading.Lock() 291 292 #: PID of the process that last modified the class data. If the PID 293 #: changes, it means the thread watch dict refers to threads that no longer 294 #: exist in the current process (since it forked), and so must be reset. 295 _cls_pid = None 296 297 #: Map watched Thread -> list of ThreadWatcher instances. 298 _cls_instances_by_target = {} 299 300 #: Map watched Thread -> watcher Thread for each watched thread. 301 _cls_thread_by_target = {} 302 303 @classmethod 304 def _reset(cls): 305 """ 306 If we have forked since the watch dictionaries were initialized, all 307 that has is garbage, so clear it. 308 """ 309 if os.getpid() != cls._cls_pid: 310 cls._cls_pid = os.getpid() 311 cls._cls_instances_by_target.clear() 312 cls._cls_thread_by_target.clear() 313 314 def __init__(self, target, on_join): 315 self.target = target 316 self.on_join = on_join 317 318 @classmethod 319 def _watch(cls, target): 320 target.join() 321 for watcher in cls._cls_instances_by_target[target]: 322 watcher.on_join() 323 324 def install(self): 325 self._cls_lock.acquire() 326 try: 327 self._reset() 328 lst = self._cls_instances_by_target.setdefault(self.target, []) 329 lst.append(self) 330 if self.target not in self._cls_thread_by_target: 331 self._cls_thread_by_target[self.target] = threading.Thread( 332 name='mitogen.master.join_thread_async', 333 target=self._watch, 334 args=(self.target,) 335 ) 336 self._cls_thread_by_target[self.target].start() 337 finally: 338 self._cls_lock.release() 339 340 def remove(self): 341 self._cls_lock.acquire() 342 try: 343 self._reset() 344 lst = self._cls_instances_by_target.get(self.target, []) 345 if self in lst: 346 lst.remove(self) 347 finally: 348 self._cls_lock.release() 349 350 @classmethod 351 def watch(cls, target, on_join): 352 watcher = cls(target, on_join) 353 watcher.install() 354 return watcher 355 356 357class LogForwarder(object): 358 """ 359 Install a :data:`mitogen.core.FORWARD_LOG` handler that delivers forwarded 360 log events into the local logging framework. This is used by the master's 361 :class:`Router`. 362 363 The forwarded :class:`logging.LogRecord` objects are delivered to loggers 364 under ``mitogen.ctx.*`` corresponding to their 365 :attr:`mitogen.core.Context.name`, with the message prefixed with the 366 logger name used in the child. The records include some extra attributes: 367 368 * ``mitogen_message``: Unicode original message without the logger name 369 prepended. 370 * ``mitogen_context``: :class:`mitogen.parent.Context` reference to the 371 source context. 372 * ``mitogen_name``: Original logger name. 373 374 :param mitogen.master.Router router: 375 Router to install the handler on. 376 """ 377 def __init__(self, router): 378 self._router = router 379 self._cache = {} 380 router.add_handler( 381 fn=self._on_forward_log, 382 handle=mitogen.core.FORWARD_LOG, 383 ) 384 385 def _on_forward_log(self, msg): 386 if msg.is_dead: 387 return 388 389 context = self._router.context_by_id(msg.src_id) 390 if context is None: 391 LOG.error('%s: dropping log from unknown context %d', 392 self, msg.src_id) 393 return 394 395 name, level_s, s = msg.data.decode('utf-8', 'replace').split('\x00', 2) 396 397 logger_name = '%s.[%s]' % (name, context.name) 398 logger = self._cache.get(logger_name) 399 if logger is None: 400 self._cache[logger_name] = logger = logging.getLogger(logger_name) 401 402 # See logging.Handler.makeRecord() 403 record = logging.LogRecord( 404 name=logger.name, 405 level=int(level_s), 406 pathname='(unknown file)', 407 lineno=0, 408 msg=s, 409 args=(), 410 exc_info=None, 411 ) 412 record.mitogen_message = s 413 record.mitogen_context = self._router.context_by_id(msg.src_id) 414 record.mitogen_name = name 415 logger.handle(record) 416 417 def __repr__(self): 418 return 'LogForwarder(%r)' % (self._router,) 419 420 421class FinderMethod(object): 422 """ 423 Interface to a method for locating a Python module or package given its 424 name according to the running Python interpreter. You'd think this was a 425 simple task, right? Naive young fellow, welcome to the real world. 426 """ 427 def __repr__(self): 428 return '%s()' % (type(self).__name__,) 429 430 def find(self, fullname): 431 """ 432 Accept a canonical module name as would be found in :data:`sys.modules` 433 and return a `(path, source, is_pkg)` tuple, where: 434 435 * `path`: Unicode string containing path to source file. 436 * `source`: Bytestring containing source file's content. 437 * `is_pkg`: :data:`True` if `fullname` is a package. 438 439 :returns: 440 :data:`None` if not found, or tuple as described above. 441 """ 442 raise NotImplementedError() 443 444 445class DefectivePython3xMainMethod(FinderMethod): 446 """ 447 Recent versions of Python 3.x introduced an incomplete notion of 448 importer specs, and in doing so created permanent asymmetry in the 449 :mod:`pkgutil` interface handling for the :mod:`__main__` module. Therefore 450 we must handle :mod:`__main__` specially. 451 """ 452 def find(self, fullname): 453 """ 454 Find :mod:`__main__` using its :data:`__file__` attribute. 455 """ 456 if fullname != '__main__': 457 return None 458 459 mod = sys.modules.get(fullname) 460 if not mod: 461 return None 462 463 path = getattr(mod, '__file__', None) 464 if not (path is not None and os.path.exists(path) and _looks_like_script(path)): 465 return None 466 467 fp = open(path, 'rb') 468 try: 469 source = fp.read() 470 finally: 471 fp.close() 472 473 return path, source, False 474 475 476class PkgutilMethod(FinderMethod): 477 """ 478 Attempt to fetch source code via pkgutil. In an ideal world, this would 479 be the only required implementation of get_module(). 480 """ 481 def find(self, fullname): 482 """ 483 Find `fullname` using :func:`pkgutil.find_loader`. 484 """ 485 try: 486 # Pre-'import spec' this returned None, in Python3.6 it raises 487 # ImportError. 488 loader = pkgutil.find_loader(fullname) 489 except ImportError: 490 e = sys.exc_info()[1] 491 LOG.debug('%r._get_module_via_pkgutil(%r): %s', 492 self, fullname, e) 493 return None 494 495 IOLOG.debug('%r._get_module_via_pkgutil(%r) -> %r', 496 self, fullname, loader) 497 if not loader: 498 return 499 500 try: 501 path = _py_filename(loader.get_filename(fullname)) 502 source = loader.get_source(fullname) 503 is_pkg = loader.is_package(fullname) 504 except (AttributeError, ImportError): 505 # - Per PEP-302, get_source() and is_package() are optional, 506 # calling them may throw AttributeError. 507 # - get_filename() may throw ImportError if pkgutil.find_loader() 508 # picks a "parent" package's loader for some crap that's been 509 # stuffed in sys.modules, for example in the case of urllib3: 510 # "loader for urllib3.contrib.pyopenssl cannot handle 511 # requests.packages.urllib3.contrib.pyopenssl" 512 e = sys.exc_info()[1] 513 LOG.debug('%r: loading %r using %r failed: %s', 514 self, fullname, loader, e) 515 return 516 517 if path is None or source is None: 518 return 519 520 if isinstance(source, mitogen.core.UnicodeType): 521 # get_source() returns "string" according to PEP-302, which was 522 # reinterpreted for Python 3 to mean a Unicode string. 523 source = source.encode('utf-8') 524 525 return path, source, is_pkg 526 527 528class SysModulesMethod(FinderMethod): 529 """ 530 Attempt to fetch source code via :data:`sys.modules`. This was originally 531 specifically to support :mod:`__main__`, but it may catch a few more cases. 532 """ 533 def find(self, fullname): 534 """ 535 Find `fullname` using its :data:`__file__` attribute. 536 """ 537 module = sys.modules.get(fullname) 538 if not isinstance(module, types.ModuleType): 539 LOG.debug('%r: sys.modules[%r] absent or not a regular module', 540 self, fullname) 541 return 542 543 LOG.debug('_get_module_via_sys_modules(%r) -> %r', fullname, module) 544 alleged_name = getattr(module, '__name__', None) 545 if alleged_name != fullname: 546 LOG.debug('sys.modules[%r].__name__ is incorrect, assuming ' 547 'this is a hacky module alias and ignoring it. ' 548 'Got %r, module object: %r', 549 fullname, alleged_name, module) 550 return 551 552 path = _py_filename(getattr(module, '__file__', '')) 553 if not path: 554 return 555 556 LOG.debug('%r: sys.modules[%r]: found %s', self, fullname, path) 557 is_pkg = hasattr(module, '__path__') 558 try: 559 source = inspect.getsource(module) 560 except IOError: 561 # Work around inspect.getsourcelines() bug for 0-byte __init__.py 562 # files. 563 if not is_pkg: 564 raise 565 source = '\n' 566 567 if isinstance(source, mitogen.core.UnicodeType): 568 # get_source() returns "string" according to PEP-302, which was 569 # reinterpreted for Python 3 to mean a Unicode string. 570 source = source.encode('utf-8') 571 572 return path, source, is_pkg 573 574 575class ParentEnumerationMethod(FinderMethod): 576 """ 577 Attempt to fetch source code by examining the module's (hopefully less 578 insane) parent package, and if no insane parents exist, simply use 579 :mod:`sys.path` to search for it from scratch on the filesystem using the 580 normal Python lookup mechanism. 581 582 This is required for older versions of :mod:`ansible.compat.six`, 583 :mod:`plumbum.colors`, Ansible 2.8 :mod:`ansible.module_utils.distro` and 584 its submodule :mod:`ansible.module_utils.distro._distro`. 585 586 When some package dynamically replaces itself in :data:`sys.modules`, but 587 only conditionally according to some program logic, it is possible that 588 children may attempt to load modules and subpackages from it that can no 589 longer be resolved by examining a (corrupted) parent. 590 591 For cases like :mod:`ansible.module_utils.distro`, this must handle cases 592 where a package transmuted itself into a totally unrelated module during 593 import and vice versa, where :data:`sys.modules` is replaced with junk that 594 makes it impossible to discover the loaded module using the in-memory 595 module object or any parent package's :data:`__path__`, since they have all 596 been overwritten. Some men just want to watch the world burn. 597 """ 598 def _find_sane_parent(self, fullname): 599 """ 600 Iteratively search :data:`sys.modules` for the least indirect parent of 601 `fullname` that is loaded and contains a :data:`__path__` attribute. 602 603 :return: 604 `(parent_name, path, modpath)` tuple, where: 605 606 * `modname`: canonical name of the found package, or the empty 607 string if none is found. 608 * `search_path`: :data:`__path__` attribute of the least 609 indirect parent found, or :data:`None` if no indirect parent 610 was found. 611 * `modpath`: list of module name components leading from `path` 612 to the target module. 613 """ 614 path = None 615 modpath = [] 616 while True: 617 pkgname, _, modname = str_rpartition(to_text(fullname), u'.') 618 modpath.insert(0, modname) 619 if not pkgname: 620 return [], None, modpath 621 622 pkg = sys.modules.get(pkgname) 623 path = getattr(pkg, '__path__', None) 624 if pkg and path: 625 return pkgname.split('.'), path, modpath 626 627 LOG.debug('%r: %r lacks __path__ attribute', self, pkgname) 628 fullname = pkgname 629 630 def _found_package(self, fullname, path): 631 path = os.path.join(path, '__init__.py') 632 LOG.debug('%r: %r is PKG_DIRECTORY: %r', self, fullname, path) 633 return self._found_module( 634 fullname=fullname, 635 path=path, 636 fp=open(path, 'rb'), 637 is_pkg=True, 638 ) 639 640 def _found_module(self, fullname, path, fp, is_pkg=False): 641 try: 642 path = _py_filename(path) 643 if not path: 644 return 645 646 source = fp.read() 647 finally: 648 if fp: 649 fp.close() 650 651 if isinstance(source, mitogen.core.UnicodeType): 652 # get_source() returns "string" according to PEP-302, which was 653 # reinterpreted for Python 3 to mean a Unicode string. 654 source = source.encode('utf-8') 655 return path, source, is_pkg 656 657 def _find_one_component(self, modname, search_path): 658 try: 659 #fp, path, (suffix, _, kind) = imp.find_module(modname, search_path) 660 return imp.find_module(modname, search_path) 661 except ImportError: 662 e = sys.exc_info()[1] 663 LOG.debug('%r: imp.find_module(%r, %r) -> %s', 664 self, modname, [search_path], e) 665 return None 666 667 def find(self, fullname): 668 """ 669 See implementation for a description of how this works. 670 """ 671 #if fullname not in sys.modules: 672 # Don't attempt this unless a module really exists in sys.modules, 673 # else we could return junk. 674 #return 675 676 fullname = to_text(fullname) 677 modname, search_path, modpath = self._find_sane_parent(fullname) 678 while True: 679 tup = self._find_one_component(modpath.pop(0), search_path) 680 if tup is None: 681 return None 682 683 fp, path, (suffix, _, kind) = tup 684 if modpath: 685 # Still more components to descent. Result must be a package 686 if fp: 687 fp.close() 688 if kind != imp.PKG_DIRECTORY: 689 LOG.debug('%r: %r appears to be child of non-package %r', 690 self, fullname, path) 691 return None 692 search_path = [path] 693 elif kind == imp.PKG_DIRECTORY: 694 return self._found_package(fullname, path) 695 else: 696 return self._found_module(fullname, path, fp) 697 698 699class ModuleFinder(object): 700 """ 701 Given the name of a loaded module, make a best-effort attempt at finding 702 related modules likely needed by a child context requesting the original 703 module. 704 """ 705 def __init__(self): 706 #: Import machinery is expensive, keep :py:meth`:get_module_source` 707 #: results around. 708 self._found_cache = {} 709 710 #: Avoid repeated dependency scanning, which is expensive. 711 self._related_cache = {} 712 713 def __repr__(self): 714 return 'ModuleFinder()' 715 716 def add_source_override(self, fullname, path, source, is_pkg): 717 """ 718 Explicitly install a source cache entry, preventing usual lookup 719 methods from being used. 720 721 Beware the value of `path` is critical when `is_pkg` is specified, 722 since it directs where submodules are searched for. 723 724 :param str fullname: 725 Name of the module to override. 726 :param str path: 727 Module's path as it will appear in the cache. 728 :param bytes source: 729 Module source code as a bytestring. 730 :param bool is_pkg: 731 :data:`True` if the module is a package. 732 """ 733 self._found_cache[fullname] = (path, source, is_pkg) 734 735 get_module_methods = [ 736 DefectivePython3xMainMethod(), 737 PkgutilMethod(), 738 SysModulesMethod(), 739 ParentEnumerationMethod(), 740 ] 741 742 def get_module_source(self, fullname): 743 """ 744 Given the name of a loaded module `fullname`, attempt to find its 745 source code. 746 747 :returns: 748 Tuple of `(module path, source text, is package?)`, or :data:`None` 749 if the source cannot be found. 750 """ 751 tup = self._found_cache.get(fullname) 752 if tup: 753 return tup 754 755 for method in self.get_module_methods: 756 tup = method.find(fullname) 757 if tup: 758 #LOG.debug('%r returned %r', method, tup) 759 break 760 else: 761 tup = None, None, None 762 LOG.debug('get_module_source(%r): cannot find source', fullname) 763 764 self._found_cache[fullname] = tup 765 return tup 766 767 def resolve_relpath(self, fullname, level): 768 """ 769 Given an ImportFrom AST node, guess the prefix that should be tacked on 770 to an alias name to produce a canonical name. `fullname` is the name of 771 the module in which the ImportFrom appears. 772 """ 773 mod = sys.modules.get(fullname, None) 774 if hasattr(mod, '__path__'): 775 fullname += '.__init__' 776 777 if level == 0 or not fullname: 778 return '' 779 780 bits = fullname.split('.') 781 if len(bits) <= level: 782 # This would be an ImportError in real code. 783 return '' 784 785 return '.'.join(bits[:-level]) + '.' 786 787 def generate_parent_names(self, fullname): 788 while '.' in fullname: 789 fullname, _, _ = str_rpartition(to_text(fullname), u'.') 790 yield fullname 791 792 def find_related_imports(self, fullname): 793 """ 794 Return a list of non-stdlib modules that are directly imported by 795 `fullname`, plus their parents. 796 797 The list is determined by retrieving the source code of 798 `fullname`, compiling it, and examining all IMPORT_NAME ops. 799 800 :param fullname: Fully qualified name of an *already imported* module 801 for which source code can be retrieved 802 :type fullname: str 803 """ 804 related = self._related_cache.get(fullname) 805 if related is not None: 806 return related 807 808 modpath, src, _ = self.get_module_source(fullname) 809 if src is None: 810 return [] 811 812 maybe_names = list(self.generate_parent_names(fullname)) 813 814 co = compile(src, modpath, 'exec') 815 for level, modname, namelist in scan_code_imports(co): 816 if level == -1: 817 modnames = [modname, '%s.%s' % (fullname, modname)] 818 else: 819 modnames = [ 820 '%s%s' % (self.resolve_relpath(fullname, level), modname) 821 ] 822 823 maybe_names.extend(modnames) 824 maybe_names.extend( 825 '%s.%s' % (mname, name) 826 for mname in modnames 827 for name in namelist 828 ) 829 830 return self._related_cache.setdefault(fullname, sorted( 831 set( 832 mitogen.core.to_text(name) 833 for name in maybe_names 834 if sys.modules.get(name) is not None 835 and not is_stdlib_name(name) 836 and u'six.moves' not in name # TODO: crap 837 ) 838 )) 839 840 def find_related(self, fullname): 841 """ 842 Return a list of non-stdlib modules that are imported directly or 843 indirectly by `fullname`, plus their parents. 844 845 This method is like :py:meth:`find_related_imports`, but also 846 recursively searches any modules which are imported by `fullname`. 847 848 :param fullname: Fully qualified name of an *already imported* module 849 for which source code can be retrieved 850 :type fullname: str 851 """ 852 stack = [fullname] 853 found = set() 854 855 while stack: 856 name = stack.pop(0) 857 names = self.find_related_imports(name) 858 stack.extend(set(names).difference(set(found).union(stack))) 859 found.update(names) 860 861 found.discard(fullname) 862 return sorted(found) 863 864 865class ModuleResponder(object): 866 def __init__(self, router): 867 self._log = logging.getLogger('mitogen.responder') 868 self._router = router 869 self._finder = ModuleFinder() 870 self._cache = {} # fullname -> pickled 871 self.blacklist = [] 872 self.whitelist = [''] 873 874 #: Context -> set([fullname, ..]) 875 self._forwarded_by_context = {} 876 877 #: Number of GET_MODULE messages received. 878 self.get_module_count = 0 879 #: Total time spent in uncached GET_MODULE. 880 self.get_module_secs = 0.0 881 #: Total time spent minifying modules. 882 self.minify_secs = 0.0 883 #: Number of successful LOAD_MODULE messages sent. 884 self.good_load_module_count = 0 885 #: Total bytes in successful LOAD_MODULE payloads. 886 self.good_load_module_size = 0 887 #: Number of negative LOAD_MODULE messages sent. 888 self.bad_load_module_count = 0 889 890 router.add_handler( 891 fn=self._on_get_module, 892 handle=mitogen.core.GET_MODULE, 893 ) 894 895 def __repr__(self): 896 return 'ModuleResponder' 897 898 def add_source_override(self, fullname, path, source, is_pkg): 899 """ 900 See :meth:`ModuleFinder.add_source_override`. 901 """ 902 self._finder.add_source_override(fullname, path, source, is_pkg) 903 904 MAIN_RE = re.compile(b(r'^if\s+__name__\s*==\s*.__main__.\s*:'), re.M) 905 main_guard_msg = ( 906 "A child context attempted to import __main__, however the main " 907 "module present in the master process lacks an execution guard. " 908 "Update %r to prevent unintended execution, using a guard like:\n" 909 "\n" 910 " if __name__ == '__main__':\n" 911 " # your code here.\n" 912 ) 913 914 def whitelist_prefix(self, fullname): 915 if self.whitelist == ['']: 916 self.whitelist = ['mitogen'] 917 self.whitelist.append(fullname) 918 919 def blacklist_prefix(self, fullname): 920 self.blacklist.append(fullname) 921 922 def neutralize_main(self, path, src): 923 """ 924 Given the source for the __main__ module, try to find where it begins 925 conditional execution based on a "if __name__ == '__main__'" guard, and 926 remove any code after that point. 927 """ 928 match = self.MAIN_RE.search(src) 929 if match: 930 return src[:match.start()] 931 932 if b('mitogen.main(') in src: 933 return src 934 935 self._log.error(self.main_guard_msg, path) 936 raise ImportError('refused') 937 938 def _make_negative_response(self, fullname): 939 return (fullname, None, None, None, ()) 940 941 minify_safe_re = re.compile(b(r'\s+#\s*!mitogen:\s*minify_safe')) 942 943 def _build_tuple(self, fullname): 944 if fullname in self._cache: 945 return self._cache[fullname] 946 947 if mitogen.core.is_blacklisted_import(self, fullname): 948 raise ImportError('blacklisted') 949 950 path, source, is_pkg = self._finder.get_module_source(fullname) 951 if path and is_stdlib_path(path): 952 # Prevent loading of 2.x<->3.x stdlib modules! This costs one 953 # RTT per hit, so a client-side solution is also required. 954 self._log.debug('refusing to serve stdlib module %r', fullname) 955 tup = self._make_negative_response(fullname) 956 self._cache[fullname] = tup 957 return tup 958 959 if source is None: 960 # TODO: make this .warning() or similar again once importer has its 961 # own logging category. 962 self._log.debug('could not find source for %r', fullname) 963 tup = self._make_negative_response(fullname) 964 self._cache[fullname] = tup 965 return tup 966 967 if self.minify_safe_re.search(source): 968 # If the module contains a magic marker, it's safe to minify. 969 t0 = mitogen.core.now() 970 source = mitogen.minify.minimize_source(source).encode('utf-8') 971 self.minify_secs += mitogen.core.now() - t0 972 973 if is_pkg: 974 pkg_present = get_child_modules(path) 975 self._log.debug('%s is a package at %s with submodules %r', 976 fullname, path, pkg_present) 977 else: 978 pkg_present = None 979 980 if fullname == '__main__': 981 source = self.neutralize_main(path, source) 982 compressed = mitogen.core.Blob(zlib.compress(source, 9)) 983 related = [ 984 to_text(name) 985 for name in self._finder.find_related(fullname) 986 if not mitogen.core.is_blacklisted_import(self, name) 987 ] 988 # 0:fullname 1:pkg_present 2:path 3:compressed 4:related 989 tup = ( 990 to_text(fullname), 991 pkg_present, 992 to_text(path), 993 compressed, 994 related 995 ) 996 self._cache[fullname] = tup 997 return tup 998 999 def _send_load_module(self, stream, fullname): 1000 if fullname not in stream.protocol.sent_modules: 1001 tup = self._build_tuple(fullname) 1002 msg = mitogen.core.Message.pickled( 1003 tup, 1004 dst_id=stream.protocol.remote_id, 1005 handle=mitogen.core.LOAD_MODULE, 1006 ) 1007 self._log.debug('sending %s (%.2f KiB) to %s', 1008 fullname, len(msg.data) / 1024.0, stream.name) 1009 self._router._async_route(msg) 1010 stream.protocol.sent_modules.add(fullname) 1011 if tup[2] is not None: 1012 self.good_load_module_count += 1 1013 self.good_load_module_size += len(msg.data) 1014 else: 1015 self.bad_load_module_count += 1 1016 1017 def _send_module_load_failed(self, stream, fullname): 1018 self.bad_load_module_count += 1 1019 stream.protocol.send( 1020 mitogen.core.Message.pickled( 1021 self._make_negative_response(fullname), 1022 dst_id=stream.protocol.remote_id, 1023 handle=mitogen.core.LOAD_MODULE, 1024 ) 1025 ) 1026 1027 def _send_module_and_related(self, stream, fullname): 1028 if fullname in stream.protocol.sent_modules: 1029 return 1030 1031 try: 1032 tup = self._build_tuple(fullname) 1033 for name in tup[4]: # related 1034 parent, _, _ = str_partition(name, '.') 1035 if parent != fullname and parent not in stream.protocol.sent_modules: 1036 # Parent hasn't been sent, so don't load submodule yet. 1037 continue 1038 1039 self._send_load_module(stream, name) 1040 self._send_load_module(stream, fullname) 1041 except Exception: 1042 LOG.debug('While importing %r', fullname, exc_info=True) 1043 self._send_module_load_failed(stream, fullname) 1044 1045 def _on_get_module(self, msg): 1046 if msg.is_dead: 1047 return 1048 1049 stream = self._router.stream_by_id(msg.src_id) 1050 if stream is None: 1051 return 1052 1053 fullname = msg.data.decode() 1054 self._log.debug('%s requested module %s', stream.name, fullname) 1055 self.get_module_count += 1 1056 if fullname in stream.protocol.sent_modules: 1057 LOG.warning('_on_get_module(): dup request for %r from %r', 1058 fullname, stream) 1059 1060 t0 = mitogen.core.now() 1061 try: 1062 self._send_module_and_related(stream, fullname) 1063 finally: 1064 self.get_module_secs += mitogen.core.now() - t0 1065 1066 def _send_forward_module(self, stream, context, fullname): 1067 if stream.protocol.remote_id != context.context_id: 1068 stream.protocol._send( 1069 mitogen.core.Message( 1070 data=b('%s\x00%s' % (context.context_id, fullname)), 1071 handle=mitogen.core.FORWARD_MODULE, 1072 dst_id=stream.protocol.remote_id, 1073 ) 1074 ) 1075 1076 def _forward_one_module(self, context, fullname): 1077 forwarded = self._forwarded_by_context.get(context) 1078 if forwarded is None: 1079 forwarded = set() 1080 self._forwarded_by_context[context] = forwarded 1081 1082 if fullname in forwarded: 1083 return 1084 1085 path = [] 1086 while fullname: 1087 path.append(fullname) 1088 fullname, _, _ = str_rpartition(fullname, u'.') 1089 1090 stream = self._router.stream_by_id(context.context_id) 1091 if stream is None: 1092 LOG.debug('%r: dropping forward of %s to no longer existent ' 1093 '%r', self, path[0], context) 1094 return 1095 1096 for fullname in reversed(path): 1097 self._send_module_and_related(stream, fullname) 1098 self._send_forward_module(stream, context, fullname) 1099 1100 def _forward_modules(self, context, fullnames): 1101 IOLOG.debug('%r._forward_modules(%r, %r)', self, context, fullnames) 1102 for fullname in fullnames: 1103 self._forward_one_module(context, mitogen.core.to_text(fullname)) 1104 1105 def forward_modules(self, context, fullnames): 1106 self._router.broker.defer(self._forward_modules, context, fullnames) 1107 1108 1109class Broker(mitogen.core.Broker): 1110 """ 1111 .. note:: 1112 1113 You may construct as many brokers as desired, and use the same broker 1114 for multiple routers, however usually only one broker need exist. 1115 Multiple brokers may be useful when dealing with sets of children with 1116 differing lifetimes. For example, a subscription service where 1117 non-payment results in termination for one customer. 1118 1119 :param bool install_watcher: 1120 If :data:`True`, an additional thread is started to monitor the 1121 lifetime of the main thread, triggering :meth:`shutdown` 1122 automatically in case the user forgets to call it, or their code 1123 crashed. 1124 1125 You should not rely on this functionality in your program, it is only 1126 intended as a fail-safe and to simplify the API for new users. In 1127 particular, alternative Python implementations may not be able to 1128 support watching the main thread. 1129 """ 1130 shutdown_timeout = 5.0 1131 _watcher = None 1132 poller_class = mitogen.parent.PREFERRED_POLLER 1133 1134 def __init__(self, install_watcher=True): 1135 if install_watcher: 1136 self._watcher = ThreadWatcher.watch( 1137 target=threading.currentThread(), 1138 on_join=self.shutdown, 1139 ) 1140 super(Broker, self).__init__() 1141 self.timers = mitogen.parent.TimerList() 1142 1143 def shutdown(self): 1144 super(Broker, self).shutdown() 1145 if self._watcher: 1146 self._watcher.remove() 1147 1148 1149class Router(mitogen.parent.Router): 1150 """ 1151 Extend :class:`mitogen.core.Router` with functionality useful to masters, 1152 and child contexts who later become masters. Currently when this class is 1153 required, the target context's router is upgraded at runtime. 1154 1155 .. note:: 1156 1157 You may construct as many routers as desired, and use the same broker 1158 for multiple routers, however usually only one broker and router need 1159 exist. Multiple routers may be useful when dealing with separate trust 1160 domains, for example, manipulating infrastructure belonging to separate 1161 customers or projects. 1162 1163 :param mitogen.master.Broker broker: 1164 Broker to use. If not specified, a private :class:`Broker` is created. 1165 1166 :param int max_message_size: 1167 Override the maximum message size this router is willing to receive or 1168 transmit. Any value set here is automatically inherited by any children 1169 created by the router. 1170 1171 This has a liberal default of 128 MiB, but may be set much lower. 1172 Beware that setting it below 64KiB may encourage unexpected failures as 1173 parents and children can no longer route large Python modules that may 1174 be required by your application. 1175 """ 1176 1177 broker_class = Broker 1178 1179 #: When :data:`True`, cause the broker thread and any subsequent broker and 1180 #: main threads existing in any child to write 1181 #: ``/tmp/mitogen.stats.<pid>.<thread_name>.log`` containing a 1182 #: :mod:`cProfile` dump on graceful exit. Must be set prior to construction 1183 #: of any :class:`Broker`, e.g. via:: 1184 #: 1185 #: mitogen.master.Router.profiling = True 1186 profiling = os.environ.get('MITOGEN_PROFILING') is not None 1187 1188 def __init__(self, broker=None, max_message_size=None): 1189 if broker is None: 1190 broker = self.broker_class() 1191 if max_message_size: 1192 self.max_message_size = max_message_size 1193 super(Router, self).__init__(broker) 1194 self.upgrade() 1195 1196 def upgrade(self): 1197 self.id_allocator = IdAllocator(self) 1198 self.responder = ModuleResponder(self) 1199 self.log_forwarder = LogForwarder(self) 1200 self.route_monitor = mitogen.parent.RouteMonitor(router=self) 1201 self.add_handler( # TODO: cutpaste. 1202 fn=self._on_detaching, 1203 handle=mitogen.core.DETACHING, 1204 persist=True, 1205 ) 1206 1207 def _on_broker_exit(self): 1208 super(Router, self)._on_broker_exit() 1209 dct = self.get_stats() 1210 dct['self'] = self 1211 dct['minify_ms'] = 1000 * dct['minify_secs'] 1212 dct['get_module_ms'] = 1000 * dct['get_module_secs'] 1213 dct['good_load_module_size_kb'] = dct['good_load_module_size'] / 1024.0 1214 dct['good_load_module_size_avg'] = ( 1215 ( 1216 dct['good_load_module_size'] / 1217 (float(dct['good_load_module_count']) or 1.0) 1218 ) / 1024.0 1219 ) 1220 1221 LOG.debug( 1222 '%(self)r: stats: ' 1223 '%(get_module_count)d module requests in ' 1224 '%(get_module_ms)d ms, ' 1225 '%(good_load_module_count)d sent ' 1226 '(%(minify_ms)d ms minify time), ' 1227 '%(bad_load_module_count)d negative responses. ' 1228 'Sent %(good_load_module_size_kb).01f kb total, ' 1229 '%(good_load_module_size_avg).01f kb avg.' 1230 % dct 1231 ) 1232 1233 def get_stats(self): 1234 """ 1235 Return performance data for the module responder. 1236 1237 :returns: 1238 1239 Dict containing keys: 1240 1241 * `get_module_count`: Integer count of 1242 :data:`mitogen.core.GET_MODULE` messages received. 1243 * `get_module_secs`: Floating point total seconds spent servicing 1244 :data:`mitogen.core.GET_MODULE` requests. 1245 * `good_load_module_count`: Integer count of successful 1246 :data:`mitogen.core.LOAD_MODULE` messages sent. 1247 * `good_load_module_size`: Integer total bytes sent in 1248 :data:`mitogen.core.LOAD_MODULE` message payloads. 1249 * `bad_load_module_count`: Integer count of negative 1250 :data:`mitogen.core.LOAD_MODULE` messages sent. 1251 * `minify_secs`: CPU seconds spent minifying modules marked 1252 minify-safe. 1253 """ 1254 return { 1255 'get_module_count': self.responder.get_module_count, 1256 'get_module_secs': self.responder.get_module_secs, 1257 'good_load_module_count': self.responder.good_load_module_count, 1258 'good_load_module_size': self.responder.good_load_module_size, 1259 'bad_load_module_count': self.responder.bad_load_module_count, 1260 'minify_secs': self.responder.minify_secs, 1261 } 1262 1263 def enable_debug(self): 1264 """ 1265 Cause this context and any descendant child contexts to write debug 1266 logs to ``/tmp/mitogen.<pid>.log``. 1267 """ 1268 mitogen.core.enable_debug_logging() 1269 self.debug = True 1270 1271 def __enter__(self): 1272 return self 1273 1274 def __exit__(self, e_type, e_val, tb): 1275 self.broker.shutdown() 1276 self.broker.join() 1277 1278 def disconnect_stream(self, stream): 1279 self.broker.defer(stream.on_disconnect, self.broker) 1280 1281 def disconnect_all(self): 1282 for stream in self._stream_by_id.values(): 1283 self.disconnect_stream(stream) 1284 1285 1286class IdAllocator(object): 1287 """ 1288 Allocate IDs for new contexts constructed locally, and blocks of IDs for 1289 children to allocate their own IDs using 1290 :class:`mitogen.parent.ChildIdAllocator` without risk of conflict, and 1291 without necessitating network round-trips for each new context. 1292 1293 This class responds to :data:`mitogen.core.ALLOCATE_ID` messages received 1294 from children by replying with fresh block ID allocations. 1295 1296 The master's :class:`IdAllocator` instance can be accessed via 1297 :attr:`mitogen.master.Router.id_allocator`. 1298 """ 1299 #: Block allocations are made in groups of 1000 by default. 1300 BLOCK_SIZE = 1000 1301 1302 def __init__(self, router): 1303 self.router = router 1304 self.next_id = 1 1305 self.lock = threading.Lock() 1306 router.add_handler( 1307 fn=self.on_allocate_id, 1308 handle=mitogen.core.ALLOCATE_ID, 1309 ) 1310 1311 def __repr__(self): 1312 return 'IdAllocator(%r)' % (self.router,) 1313 1314 def allocate(self): 1315 """ 1316 Allocate a context ID by directly incrementing an internal counter. 1317 1318 :returns: 1319 The new context ID. 1320 """ 1321 self.lock.acquire() 1322 try: 1323 id_ = self.next_id 1324 self.next_id += 1 1325 return id_ 1326 finally: 1327 self.lock.release() 1328 1329 def allocate_block(self): 1330 """ 1331 Allocate a block of IDs for use in a child context. 1332 1333 This function is safe to call from any thread. 1334 1335 :returns: 1336 Tuple of the form `(id, end_id)` where `id` is the first usable ID 1337 and `end_id` is the last usable ID. 1338 """ 1339 self.lock.acquire() 1340 try: 1341 id_ = self.next_id 1342 self.next_id += self.BLOCK_SIZE 1343 end_id = id_ + self.BLOCK_SIZE 1344 LOG.debug('%r: allocating [%d..%d)', self, id_, end_id) 1345 return id_, end_id 1346 finally: 1347 self.lock.release() 1348 1349 def on_allocate_id(self, msg): 1350 if msg.is_dead: 1351 return 1352 1353 id_, last_id = self.allocate_block() 1354 requestee = self.router.context_by_id(msg.src_id) 1355 LOG.debug('%r: allocating [%r..%r) to %r', 1356 self, id_, last_id, requestee) 1357 msg.reply((id_, last_id)) 1358