1# Copyright 2019, David Wilson
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are met:
5#
6# 1. Redistributions of source code must retain the above copyright notice,
7# this list of conditions and the following disclaimer.
8#
9# 2. Redistributions in binary form must reproduce the above copyright notice,
10# this list of conditions and the following disclaimer in the documentation
11# and/or other materials provided with the distribution.
12#
13# 3. Neither the name of the copyright holder nor the names of its contributors
14# may be used to endorse or promote products derived from this software without
15# specific prior written permission.
16#
17# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
21# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27# POSSIBILITY OF SUCH DAMAGE.
28
29# !mitogen: minify_safe
30
31"""
32This module implements functionality required by master processes, such as
33starting new contexts via SSH. Its size is also restricted, since it must
34be sent to any context that will be used to establish additional child
35contexts.
36"""
37
38import dis
39import errno
40import imp
41import inspect
42import itertools
43import logging
44import os
45import pkgutil
46import re
47import string
48import sys
49import threading
50import types
51import zlib
52
53try:
54    import sysconfig
55except ImportError:
56    sysconfig = None
57
58if not hasattr(pkgutil, 'find_loader'):
59    # find_loader() was new in >=2.5, but the modern pkgutil.py syntax has
60    # been kept intentionally 2.3 compatible so we can reuse it.
61    from mitogen.compat import pkgutil
62
63import mitogen
64import mitogen.core
65import mitogen.minify
66import mitogen.parent
67
68from mitogen.core import b
69from mitogen.core import IOLOG
70from mitogen.core import LOG
71from mitogen.core import str_partition
72from mitogen.core import str_rpartition
73from mitogen.core import to_text
74
75imap = getattr(itertools, 'imap', map)
76izip = getattr(itertools, 'izip', zip)
77
78try:
79    any
80except NameError:
81    from mitogen.core import any
82
83try:
84    next
85except NameError:
86    from mitogen.core import next
87
88
89RLOG = logging.getLogger('mitogen.ctx')
90
91
92def _stdlib_paths():
93    """
94    Return a set of paths from which Python imports the standard library.
95    """
96    attr_candidates = [
97        'prefix',
98        'real_prefix',  # virtualenv: only set inside a virtual environment.
99        'base_prefix',  # venv: always set, equal to prefix if outside.
100    ]
101    prefixes = (getattr(sys, a, None) for a in attr_candidates)
102    version = 'python%s.%s' % sys.version_info[0:2]
103    s = set(os.path.abspath(os.path.join(p, 'lib', version))
104            for p in prefixes if p is not None)
105
106    # When running 'unit2 tests/module_finder_test.py' in a Py2 venv on Ubuntu
107    # 18.10, above is insufficient to catch the real directory.
108    if sysconfig is not None:
109        s.add(sysconfig.get_config_var('DESTLIB'))
110    return s
111
112
113def is_stdlib_name(modname):
114    """
115    Return :data:`True` if `modname` appears to come from the standard library.
116    """
117    if imp.is_builtin(modname) != 0:
118        return True
119
120    module = sys.modules.get(modname)
121    if module is None:
122        return False
123
124    # six installs crap with no __file__
125    modpath = os.path.abspath(getattr(module, '__file__', ''))
126    return is_stdlib_path(modpath)
127
128
129_STDLIB_PATHS = _stdlib_paths()
130
131
132def is_stdlib_path(path):
133    return any(
134        os.path.commonprefix((libpath, path)) == libpath
135        and 'site-packages' not in path
136        and 'dist-packages' not in path
137        for libpath in _STDLIB_PATHS
138    )
139
140
141def get_child_modules(path):
142    """
143    Return the suffixes of submodules directly neated beneath of the package
144    directory at `path`.
145
146    :param str path:
147        Path to the module's source code on disk, or some PEP-302-recognized
148        equivalent. Usually this is the module's ``__file__`` attribute, but
149        is specified explicitly to avoid loading the module.
150
151    :return:
152        List of submodule name suffixes.
153    """
154    it = pkgutil.iter_modules([os.path.dirname(path)])
155    return [to_text(name) for _, name, _ in it]
156
157
158def _looks_like_script(path):
159    """
160    Return :data:`True` if the (possibly extensionless) file at `path`
161    resembles a Python script. For now we simply verify the file contains
162    ASCII text.
163    """
164    try:
165        fp = open(path, 'rb')
166    except IOError:
167        e = sys.exc_info()[1]
168        if e.args[0] == errno.EISDIR:
169            return False
170        raise
171
172    try:
173        sample = fp.read(512).decode('latin-1')
174        return not set(sample).difference(string.printable)
175    finally:
176        fp.close()
177
178
179def _py_filename(path):
180    if not path:
181        return None
182
183    if path[-4:] in ('.pyc', '.pyo'):
184        path = path.rstrip('co')
185
186    if path.endswith('.py'):
187        return path
188
189    if os.path.exists(path) and _looks_like_script(path):
190        return path
191
192
193def _get_core_source():
194    """
195    Master version of parent.get_core_source().
196    """
197    source = inspect.getsource(mitogen.core)
198    return mitogen.minify.minimize_source(source)
199
200
201if mitogen.is_master:
202    # TODO: find a less surprising way of installing this.
203    mitogen.parent._get_core_source = _get_core_source
204
205
206LOAD_CONST = dis.opname.index('LOAD_CONST')
207IMPORT_NAME = dis.opname.index('IMPORT_NAME')
208
209
210def _getarg(nextb, c):
211    if c >= dis.HAVE_ARGUMENT:
212        return nextb() | (nextb() << 8)
213
214
215if sys.version_info < (3, 0):
216    def iter_opcodes(co):
217        # Yield `(op, oparg)` tuples from the code object `co`.
218        ordit = imap(ord, co.co_code)
219        nextb = ordit.next
220        return ((c, _getarg(nextb, c)) for c in ordit)
221elif sys.version_info < (3, 6):
222    def iter_opcodes(co):
223        # Yield `(op, oparg)` tuples from the code object `co`.
224        ordit = iter(co.co_code)
225        nextb = ordit.__next__
226        return ((c, _getarg(nextb, c)) for c in ordit)
227else:
228    def iter_opcodes(co):
229        # Yield `(op, oparg)` tuples from the code object `co`.
230        ordit = iter(co.co_code)
231        nextb = ordit.__next__
232        # https://github.com/abarnert/cpython/blob/c095a32f/Python/wordcode.md
233        return ((c, nextb()) for c in ordit)
234
235
236def scan_code_imports(co):
237    """
238    Given a code object `co`, scan its bytecode yielding any ``IMPORT_NAME``
239    and associated prior ``LOAD_CONST`` instructions representing an `Import`
240    statement or `ImportFrom` statement.
241
242    :return:
243        Generator producing `(level, modname, namelist)` tuples, where:
244
245        * `level`: -1 for normal import, 0, for absolute import, and >0 for
246          relative import.
247        * `modname`: Name of module to import, or from where `namelist` names
248          are imported.
249        * `namelist`: for `ImportFrom`, the list of names to be imported from
250          `modname`.
251    """
252    opit = iter_opcodes(co)
253    opit, opit2, opit3 = itertools.tee(opit, 3)
254
255    try:
256        next(opit2)
257        next(opit3)
258        next(opit3)
259    except StopIteration:
260        return
261
262    if sys.version_info >= (2, 5):
263        for oparg1, oparg2, (op3, arg3) in izip(opit, opit2, opit3):
264            if op3 == IMPORT_NAME:
265                op2, arg2 = oparg2
266                op1, arg1 = oparg1
267                if op1 == op2 == LOAD_CONST:
268                    yield (co.co_consts[arg1],
269                           co.co_names[arg3],
270                           co.co_consts[arg2] or ())
271    else:
272        # Python 2.4 did not yet have 'level', so stack format differs.
273        for oparg1, (op2, arg2) in izip(opit, opit2):
274            if op2 == IMPORT_NAME:
275                op1, arg1 = oparg1
276                if op1 == LOAD_CONST:
277                    yield (-1, co.co_names[arg2], co.co_consts[arg1] or ())
278
279
280class ThreadWatcher(object):
281    """
282    Manage threads that wait for another thread to shut down, before invoking
283    `on_join()` for each associated ThreadWatcher.
284
285    In CPython it seems possible to use this method to ensure a non-main thread
286    is signalled when the main thread has exited, using a third thread as a
287    proxy.
288    """
289    #: Protects remaining _cls_* members.
290    _cls_lock = threading.Lock()
291
292    #: PID of the process that last modified the class data. If the PID
293    #: changes, it means the thread watch dict refers to threads that no longer
294    #: exist in the current process (since it forked), and so must be reset.
295    _cls_pid = None
296
297    #: Map watched Thread -> list of ThreadWatcher instances.
298    _cls_instances_by_target = {}
299
300    #: Map watched Thread -> watcher Thread for each watched thread.
301    _cls_thread_by_target = {}
302
303    @classmethod
304    def _reset(cls):
305        """
306        If we have forked since the watch dictionaries were initialized, all
307        that has is garbage, so clear it.
308        """
309        if os.getpid() != cls._cls_pid:
310            cls._cls_pid = os.getpid()
311            cls._cls_instances_by_target.clear()
312            cls._cls_thread_by_target.clear()
313
314    def __init__(self, target, on_join):
315        self.target = target
316        self.on_join = on_join
317
318    @classmethod
319    def _watch(cls, target):
320        target.join()
321        for watcher in cls._cls_instances_by_target[target]:
322            watcher.on_join()
323
324    def install(self):
325        self._cls_lock.acquire()
326        try:
327            self._reset()
328            lst = self._cls_instances_by_target.setdefault(self.target, [])
329            lst.append(self)
330            if self.target not in self._cls_thread_by_target:
331                self._cls_thread_by_target[self.target] = threading.Thread(
332                    name='mitogen.master.join_thread_async',
333                    target=self._watch,
334                    args=(self.target,)
335                )
336                self._cls_thread_by_target[self.target].start()
337        finally:
338            self._cls_lock.release()
339
340    def remove(self):
341        self._cls_lock.acquire()
342        try:
343            self._reset()
344            lst = self._cls_instances_by_target.get(self.target, [])
345            if self in lst:
346                lst.remove(self)
347        finally:
348            self._cls_lock.release()
349
350    @classmethod
351    def watch(cls, target, on_join):
352        watcher = cls(target, on_join)
353        watcher.install()
354        return watcher
355
356
357class LogForwarder(object):
358    """
359    Install a :data:`mitogen.core.FORWARD_LOG` handler that delivers forwarded
360    log events into the local logging framework. This is used by the master's
361    :class:`Router`.
362
363    The forwarded :class:`logging.LogRecord` objects are delivered to loggers
364    under ``mitogen.ctx.*`` corresponding to their
365    :attr:`mitogen.core.Context.name`, with the message prefixed with the
366    logger name used in the child. The records include some extra attributes:
367
368    * ``mitogen_message``: Unicode original message without the logger name
369      prepended.
370    * ``mitogen_context``: :class:`mitogen.parent.Context` reference to the
371      source context.
372    * ``mitogen_name``: Original logger name.
373
374    :param mitogen.master.Router router:
375        Router to install the handler on.
376    """
377    def __init__(self, router):
378        self._router = router
379        self._cache = {}
380        router.add_handler(
381            fn=self._on_forward_log,
382            handle=mitogen.core.FORWARD_LOG,
383        )
384
385    def _on_forward_log(self, msg):
386        if msg.is_dead:
387            return
388
389        context = self._router.context_by_id(msg.src_id)
390        if context is None:
391            LOG.error('%s: dropping log from unknown context %d',
392                      self, msg.src_id)
393            return
394
395        name, level_s, s = msg.data.decode('utf-8', 'replace').split('\x00', 2)
396
397        logger_name = '%s.[%s]' % (name, context.name)
398        logger = self._cache.get(logger_name)
399        if logger is None:
400            self._cache[logger_name] = logger = logging.getLogger(logger_name)
401
402        # See logging.Handler.makeRecord()
403        record = logging.LogRecord(
404            name=logger.name,
405            level=int(level_s),
406            pathname='(unknown file)',
407            lineno=0,
408            msg=s,
409            args=(),
410            exc_info=None,
411        )
412        record.mitogen_message = s
413        record.mitogen_context = self._router.context_by_id(msg.src_id)
414        record.mitogen_name = name
415        logger.handle(record)
416
417    def __repr__(self):
418        return 'LogForwarder(%r)' % (self._router,)
419
420
421class FinderMethod(object):
422    """
423    Interface to a method for locating a Python module or package given its
424    name according to the running Python interpreter. You'd think this was a
425    simple task, right? Naive young fellow, welcome to the real world.
426    """
427    def __repr__(self):
428        return '%s()' % (type(self).__name__,)
429
430    def find(self, fullname):
431        """
432        Accept a canonical module name as would be found in :data:`sys.modules`
433        and return a `(path, source, is_pkg)` tuple, where:
434
435        * `path`: Unicode string containing path to source file.
436        * `source`: Bytestring containing source file's content.
437        * `is_pkg`: :data:`True` if `fullname` is a package.
438
439        :returns:
440            :data:`None` if not found, or tuple as described above.
441        """
442        raise NotImplementedError()
443
444
445class DefectivePython3xMainMethod(FinderMethod):
446    """
447    Recent versions of Python 3.x introduced an incomplete notion of
448    importer specs, and in doing so created permanent asymmetry in the
449    :mod:`pkgutil` interface handling for the :mod:`__main__` module. Therefore
450    we must handle :mod:`__main__` specially.
451    """
452    def find(self, fullname):
453        """
454        Find :mod:`__main__` using its :data:`__file__` attribute.
455        """
456        if fullname != '__main__':
457            return None
458
459        mod = sys.modules.get(fullname)
460        if not mod:
461            return None
462
463        path = getattr(mod, '__file__', None)
464        if not (path is not None and os.path.exists(path) and _looks_like_script(path)):
465            return None
466
467        fp = open(path, 'rb')
468        try:
469            source = fp.read()
470        finally:
471            fp.close()
472
473        return path, source, False
474
475
476class PkgutilMethod(FinderMethod):
477    """
478    Attempt to fetch source code via pkgutil. In an ideal world, this would
479    be the only required implementation of get_module().
480    """
481    def find(self, fullname):
482        """
483        Find `fullname` using :func:`pkgutil.find_loader`.
484        """
485        try:
486            # Pre-'import spec' this returned None, in Python3.6 it raises
487            # ImportError.
488            loader = pkgutil.find_loader(fullname)
489        except ImportError:
490            e = sys.exc_info()[1]
491            LOG.debug('%r._get_module_via_pkgutil(%r): %s',
492                      self, fullname, e)
493            return None
494
495        IOLOG.debug('%r._get_module_via_pkgutil(%r) -> %r',
496                    self, fullname, loader)
497        if not loader:
498            return
499
500        try:
501            path = _py_filename(loader.get_filename(fullname))
502            source = loader.get_source(fullname)
503            is_pkg = loader.is_package(fullname)
504        except (AttributeError, ImportError):
505            # - Per PEP-302, get_source() and is_package() are optional,
506            #   calling them may throw AttributeError.
507            # - get_filename() may throw ImportError if pkgutil.find_loader()
508            #   picks a "parent" package's loader for some crap that's been
509            #   stuffed in sys.modules, for example in the case of urllib3:
510            #       "loader for urllib3.contrib.pyopenssl cannot handle
511            #        requests.packages.urllib3.contrib.pyopenssl"
512            e = sys.exc_info()[1]
513            LOG.debug('%r: loading %r using %r failed: %s',
514                      self, fullname, loader, e)
515            return
516
517        if path is None or source is None:
518            return
519
520        if isinstance(source, mitogen.core.UnicodeType):
521            # get_source() returns "string" according to PEP-302, which was
522            # reinterpreted for Python 3 to mean a Unicode string.
523            source = source.encode('utf-8')
524
525        return path, source, is_pkg
526
527
528class SysModulesMethod(FinderMethod):
529    """
530    Attempt to fetch source code via :data:`sys.modules`. This was originally
531    specifically to support :mod:`__main__`, but it may catch a few more cases.
532    """
533    def find(self, fullname):
534        """
535        Find `fullname` using its :data:`__file__` attribute.
536        """
537        module = sys.modules.get(fullname)
538        if not isinstance(module, types.ModuleType):
539            LOG.debug('%r: sys.modules[%r] absent or not a regular module',
540                      self, fullname)
541            return
542
543        LOG.debug('_get_module_via_sys_modules(%r) -> %r', fullname, module)
544        alleged_name = getattr(module, '__name__', None)
545        if alleged_name != fullname:
546            LOG.debug('sys.modules[%r].__name__ is incorrect, assuming '
547                      'this is a hacky module alias and ignoring it. '
548                      'Got %r, module object: %r',
549                      fullname, alleged_name, module)
550            return
551
552        path = _py_filename(getattr(module, '__file__', ''))
553        if not path:
554            return
555
556        LOG.debug('%r: sys.modules[%r]: found %s', self, fullname, path)
557        is_pkg = hasattr(module, '__path__')
558        try:
559            source = inspect.getsource(module)
560        except IOError:
561            # Work around inspect.getsourcelines() bug for 0-byte __init__.py
562            # files.
563            if not is_pkg:
564                raise
565            source = '\n'
566
567        if isinstance(source, mitogen.core.UnicodeType):
568            # get_source() returns "string" according to PEP-302, which was
569            # reinterpreted for Python 3 to mean a Unicode string.
570            source = source.encode('utf-8')
571
572        return path, source, is_pkg
573
574
575class ParentEnumerationMethod(FinderMethod):
576    """
577    Attempt to fetch source code by examining the module's (hopefully less
578    insane) parent package, and if no insane parents exist, simply use
579    :mod:`sys.path` to search for it from scratch on the filesystem using the
580    normal Python lookup mechanism.
581
582    This is required for older versions of :mod:`ansible.compat.six`,
583    :mod:`plumbum.colors`, Ansible 2.8 :mod:`ansible.module_utils.distro` and
584    its submodule :mod:`ansible.module_utils.distro._distro`.
585
586    When some package dynamically replaces itself in :data:`sys.modules`, but
587    only conditionally according to some program logic, it is possible that
588    children may attempt to load modules and subpackages from it that can no
589    longer be resolved by examining a (corrupted) parent.
590
591    For cases like :mod:`ansible.module_utils.distro`, this must handle cases
592    where a package transmuted itself into a totally unrelated module during
593    import and vice versa, where :data:`sys.modules` is replaced with junk that
594    makes it impossible to discover the loaded module using the in-memory
595    module object or any parent package's :data:`__path__`, since they have all
596    been overwritten. Some men just want to watch the world burn.
597    """
598    def _find_sane_parent(self, fullname):
599        """
600        Iteratively search :data:`sys.modules` for the least indirect parent of
601        `fullname` that is loaded and contains a :data:`__path__` attribute.
602
603        :return:
604            `(parent_name, path, modpath)` tuple, where:
605
606                * `modname`: canonical name of the found package, or the empty
607                   string if none is found.
608                * `search_path`: :data:`__path__` attribute of the least
609                   indirect parent found, or :data:`None` if no indirect parent
610                   was found.
611                * `modpath`: list of module name components leading from `path`
612                   to the target module.
613        """
614        path = None
615        modpath = []
616        while True:
617            pkgname, _, modname = str_rpartition(to_text(fullname), u'.')
618            modpath.insert(0, modname)
619            if not pkgname:
620                return [], None, modpath
621
622            pkg = sys.modules.get(pkgname)
623            path = getattr(pkg, '__path__', None)
624            if pkg and path:
625                return pkgname.split('.'), path, modpath
626
627            LOG.debug('%r: %r lacks __path__ attribute', self, pkgname)
628            fullname = pkgname
629
630    def _found_package(self, fullname, path):
631        path = os.path.join(path, '__init__.py')
632        LOG.debug('%r: %r is PKG_DIRECTORY: %r', self, fullname, path)
633        return self._found_module(
634            fullname=fullname,
635            path=path,
636            fp=open(path, 'rb'),
637            is_pkg=True,
638        )
639
640    def _found_module(self, fullname, path, fp, is_pkg=False):
641        try:
642            path = _py_filename(path)
643            if not path:
644                return
645
646            source = fp.read()
647        finally:
648            if fp:
649                fp.close()
650
651        if isinstance(source, mitogen.core.UnicodeType):
652            # get_source() returns "string" according to PEP-302, which was
653            # reinterpreted for Python 3 to mean a Unicode string.
654            source = source.encode('utf-8')
655        return path, source, is_pkg
656
657    def _find_one_component(self, modname, search_path):
658        try:
659            #fp, path, (suffix, _, kind) = imp.find_module(modname, search_path)
660            return imp.find_module(modname, search_path)
661        except ImportError:
662            e = sys.exc_info()[1]
663            LOG.debug('%r: imp.find_module(%r, %r) -> %s',
664                      self, modname, [search_path], e)
665            return None
666
667    def find(self, fullname):
668        """
669        See implementation for a description of how this works.
670        """
671        #if fullname not in sys.modules:
672            # Don't attempt this unless a module really exists in sys.modules,
673            # else we could return junk.
674            #return
675
676        fullname = to_text(fullname)
677        modname, search_path, modpath = self._find_sane_parent(fullname)
678        while True:
679            tup = self._find_one_component(modpath.pop(0), search_path)
680            if tup is None:
681                return None
682
683            fp, path, (suffix, _, kind) = tup
684            if modpath:
685                # Still more components to descent. Result must be a package
686                if fp:
687                    fp.close()
688                if kind != imp.PKG_DIRECTORY:
689                    LOG.debug('%r: %r appears to be child of non-package %r',
690                              self, fullname, path)
691                    return None
692                search_path = [path]
693            elif kind == imp.PKG_DIRECTORY:
694                return self._found_package(fullname, path)
695            else:
696                return self._found_module(fullname, path, fp)
697
698
699class ModuleFinder(object):
700    """
701    Given the name of a loaded module, make a best-effort attempt at finding
702    related modules likely needed by a child context requesting the original
703    module.
704    """
705    def __init__(self):
706        #: Import machinery is expensive, keep :py:meth`:get_module_source`
707        #: results around.
708        self._found_cache = {}
709
710        #: Avoid repeated dependency scanning, which is expensive.
711        self._related_cache = {}
712
713    def __repr__(self):
714        return 'ModuleFinder()'
715
716    def add_source_override(self, fullname, path, source, is_pkg):
717        """
718        Explicitly install a source cache entry, preventing usual lookup
719        methods from being used.
720
721        Beware the value of `path` is critical when `is_pkg` is specified,
722        since it directs where submodules are searched for.
723
724        :param str fullname:
725            Name of the module to override.
726        :param str path:
727            Module's path as it will appear in the cache.
728        :param bytes source:
729            Module source code as a bytestring.
730        :param bool is_pkg:
731            :data:`True` if the module is a package.
732        """
733        self._found_cache[fullname] = (path, source, is_pkg)
734
735    get_module_methods = [
736        DefectivePython3xMainMethod(),
737        PkgutilMethod(),
738        SysModulesMethod(),
739        ParentEnumerationMethod(),
740    ]
741
742    def get_module_source(self, fullname):
743        """
744        Given the name of a loaded module `fullname`, attempt to find its
745        source code.
746
747        :returns:
748            Tuple of `(module path, source text, is package?)`, or :data:`None`
749            if the source cannot be found.
750        """
751        tup = self._found_cache.get(fullname)
752        if tup:
753            return tup
754
755        for method in self.get_module_methods:
756            tup = method.find(fullname)
757            if tup:
758                #LOG.debug('%r returned %r', method, tup)
759                break
760        else:
761            tup = None, None, None
762            LOG.debug('get_module_source(%r): cannot find source', fullname)
763
764        self._found_cache[fullname] = tup
765        return tup
766
767    def resolve_relpath(self, fullname, level):
768        """
769        Given an ImportFrom AST node, guess the prefix that should be tacked on
770        to an alias name to produce a canonical name. `fullname` is the name of
771        the module in which the ImportFrom appears.
772        """
773        mod = sys.modules.get(fullname, None)
774        if hasattr(mod, '__path__'):
775            fullname += '.__init__'
776
777        if level == 0 or not fullname:
778            return ''
779
780        bits = fullname.split('.')
781        if len(bits) <= level:
782            # This would be an ImportError in real code.
783            return ''
784
785        return '.'.join(bits[:-level]) + '.'
786
787    def generate_parent_names(self, fullname):
788        while '.' in fullname:
789            fullname, _, _ = str_rpartition(to_text(fullname), u'.')
790            yield fullname
791
792    def find_related_imports(self, fullname):
793        """
794        Return a list of non-stdlib modules that are directly imported by
795        `fullname`, plus their parents.
796
797        The list is determined by retrieving the source code of
798        `fullname`, compiling it, and examining all IMPORT_NAME ops.
799
800        :param fullname: Fully qualified name of an *already imported* module
801            for which source code can be retrieved
802        :type fullname: str
803        """
804        related = self._related_cache.get(fullname)
805        if related is not None:
806            return related
807
808        modpath, src, _ = self.get_module_source(fullname)
809        if src is None:
810            return []
811
812        maybe_names = list(self.generate_parent_names(fullname))
813
814        co = compile(src, modpath, 'exec')
815        for level, modname, namelist in scan_code_imports(co):
816            if level == -1:
817                modnames = [modname, '%s.%s' % (fullname, modname)]
818            else:
819                modnames = [
820                    '%s%s' % (self.resolve_relpath(fullname, level), modname)
821                ]
822
823            maybe_names.extend(modnames)
824            maybe_names.extend(
825                '%s.%s' % (mname, name)
826                for mname in modnames
827                for name in namelist
828            )
829
830        return self._related_cache.setdefault(fullname, sorted(
831            set(
832                mitogen.core.to_text(name)
833                for name in maybe_names
834                if sys.modules.get(name) is not None
835                and not is_stdlib_name(name)
836                and u'six.moves' not in name  # TODO: crap
837            )
838        ))
839
840    def find_related(self, fullname):
841        """
842        Return a list of non-stdlib modules that are imported directly or
843        indirectly by `fullname`, plus their parents.
844
845        This method is like :py:meth:`find_related_imports`, but also
846        recursively searches any modules which are imported by `fullname`.
847
848        :param fullname: Fully qualified name of an *already imported* module
849            for which source code can be retrieved
850        :type fullname: str
851        """
852        stack = [fullname]
853        found = set()
854
855        while stack:
856            name = stack.pop(0)
857            names = self.find_related_imports(name)
858            stack.extend(set(names).difference(set(found).union(stack)))
859            found.update(names)
860
861        found.discard(fullname)
862        return sorted(found)
863
864
865class ModuleResponder(object):
866    def __init__(self, router):
867        self._log = logging.getLogger('mitogen.responder')
868        self._router = router
869        self._finder = ModuleFinder()
870        self._cache = {}  # fullname -> pickled
871        self.blacklist = []
872        self.whitelist = ['']
873
874        #: Context -> set([fullname, ..])
875        self._forwarded_by_context = {}
876
877        #: Number of GET_MODULE messages received.
878        self.get_module_count = 0
879        #: Total time spent in uncached GET_MODULE.
880        self.get_module_secs = 0.0
881        #: Total time spent minifying modules.
882        self.minify_secs = 0.0
883        #: Number of successful LOAD_MODULE messages sent.
884        self.good_load_module_count = 0
885        #: Total bytes in successful LOAD_MODULE payloads.
886        self.good_load_module_size = 0
887        #: Number of negative LOAD_MODULE messages sent.
888        self.bad_load_module_count = 0
889
890        router.add_handler(
891            fn=self._on_get_module,
892            handle=mitogen.core.GET_MODULE,
893        )
894
895    def __repr__(self):
896        return 'ModuleResponder'
897
898    def add_source_override(self, fullname, path, source, is_pkg):
899        """
900        See :meth:`ModuleFinder.add_source_override`.
901        """
902        self._finder.add_source_override(fullname, path, source, is_pkg)
903
904    MAIN_RE = re.compile(b(r'^if\s+__name__\s*==\s*.__main__.\s*:'), re.M)
905    main_guard_msg = (
906        "A child context attempted to import __main__, however the main "
907        "module present in the master process lacks an execution guard. "
908        "Update %r to prevent unintended execution, using a guard like:\n"
909        "\n"
910        "    if __name__ == '__main__':\n"
911        "        # your code here.\n"
912    )
913
914    def whitelist_prefix(self, fullname):
915        if self.whitelist == ['']:
916            self.whitelist = ['mitogen']
917        self.whitelist.append(fullname)
918
919    def blacklist_prefix(self, fullname):
920        self.blacklist.append(fullname)
921
922    def neutralize_main(self, path, src):
923        """
924        Given the source for the __main__ module, try to find where it begins
925        conditional execution based on a "if __name__ == '__main__'" guard, and
926        remove any code after that point.
927        """
928        match = self.MAIN_RE.search(src)
929        if match:
930            return src[:match.start()]
931
932        if b('mitogen.main(') in src:
933            return src
934
935        self._log.error(self.main_guard_msg, path)
936        raise ImportError('refused')
937
938    def _make_negative_response(self, fullname):
939        return (fullname, None, None, None, ())
940
941    minify_safe_re = re.compile(b(r'\s+#\s*!mitogen:\s*minify_safe'))
942
943    def _build_tuple(self, fullname):
944        if fullname in self._cache:
945            return self._cache[fullname]
946
947        if mitogen.core.is_blacklisted_import(self, fullname):
948            raise ImportError('blacklisted')
949
950        path, source, is_pkg = self._finder.get_module_source(fullname)
951        if path and is_stdlib_path(path):
952            # Prevent loading of 2.x<->3.x stdlib modules! This costs one
953            # RTT per hit, so a client-side solution is also required.
954            self._log.debug('refusing to serve stdlib module %r', fullname)
955            tup = self._make_negative_response(fullname)
956            self._cache[fullname] = tup
957            return tup
958
959        if source is None:
960            # TODO: make this .warning() or similar again once importer has its
961            # own logging category.
962            self._log.debug('could not find source for %r', fullname)
963            tup = self._make_negative_response(fullname)
964            self._cache[fullname] = tup
965            return tup
966
967        if self.minify_safe_re.search(source):
968            # If the module contains a magic marker, it's safe to minify.
969            t0 = mitogen.core.now()
970            source = mitogen.minify.minimize_source(source).encode('utf-8')
971            self.minify_secs += mitogen.core.now() - t0
972
973        if is_pkg:
974            pkg_present = get_child_modules(path)
975            self._log.debug('%s is a package at %s with submodules %r',
976                            fullname, path, pkg_present)
977        else:
978            pkg_present = None
979
980        if fullname == '__main__':
981            source = self.neutralize_main(path, source)
982        compressed = mitogen.core.Blob(zlib.compress(source, 9))
983        related = [
984            to_text(name)
985            for name in self._finder.find_related(fullname)
986            if not mitogen.core.is_blacklisted_import(self, name)
987        ]
988        # 0:fullname 1:pkg_present 2:path 3:compressed 4:related
989        tup = (
990            to_text(fullname),
991            pkg_present,
992            to_text(path),
993            compressed,
994            related
995        )
996        self._cache[fullname] = tup
997        return tup
998
999    def _send_load_module(self, stream, fullname):
1000        if fullname not in stream.protocol.sent_modules:
1001            tup = self._build_tuple(fullname)
1002            msg = mitogen.core.Message.pickled(
1003                tup,
1004                dst_id=stream.protocol.remote_id,
1005                handle=mitogen.core.LOAD_MODULE,
1006            )
1007            self._log.debug('sending %s (%.2f KiB) to %s',
1008                            fullname, len(msg.data) / 1024.0, stream.name)
1009            self._router._async_route(msg)
1010            stream.protocol.sent_modules.add(fullname)
1011            if tup[2] is not None:
1012                self.good_load_module_count += 1
1013                self.good_load_module_size += len(msg.data)
1014            else:
1015                self.bad_load_module_count += 1
1016
1017    def _send_module_load_failed(self, stream, fullname):
1018        self.bad_load_module_count += 1
1019        stream.protocol.send(
1020            mitogen.core.Message.pickled(
1021                self._make_negative_response(fullname),
1022                dst_id=stream.protocol.remote_id,
1023                handle=mitogen.core.LOAD_MODULE,
1024            )
1025        )
1026
1027    def _send_module_and_related(self, stream, fullname):
1028        if fullname in stream.protocol.sent_modules:
1029            return
1030
1031        try:
1032            tup = self._build_tuple(fullname)
1033            for name in tup[4]:  # related
1034                parent, _, _ = str_partition(name, '.')
1035                if parent != fullname and parent not in stream.protocol.sent_modules:
1036                    # Parent hasn't been sent, so don't load submodule yet.
1037                    continue
1038
1039                self._send_load_module(stream, name)
1040            self._send_load_module(stream, fullname)
1041        except Exception:
1042            LOG.debug('While importing %r', fullname, exc_info=True)
1043            self._send_module_load_failed(stream, fullname)
1044
1045    def _on_get_module(self, msg):
1046        if msg.is_dead:
1047            return
1048
1049        stream = self._router.stream_by_id(msg.src_id)
1050        if stream is None:
1051            return
1052
1053        fullname = msg.data.decode()
1054        self._log.debug('%s requested module %s', stream.name, fullname)
1055        self.get_module_count += 1
1056        if fullname in stream.protocol.sent_modules:
1057            LOG.warning('_on_get_module(): dup request for %r from %r',
1058                        fullname, stream)
1059
1060        t0 = mitogen.core.now()
1061        try:
1062            self._send_module_and_related(stream, fullname)
1063        finally:
1064            self.get_module_secs += mitogen.core.now() - t0
1065
1066    def _send_forward_module(self, stream, context, fullname):
1067        if stream.protocol.remote_id != context.context_id:
1068            stream.protocol._send(
1069                mitogen.core.Message(
1070                    data=b('%s\x00%s' % (context.context_id, fullname)),
1071                    handle=mitogen.core.FORWARD_MODULE,
1072                    dst_id=stream.protocol.remote_id,
1073                )
1074            )
1075
1076    def _forward_one_module(self, context, fullname):
1077        forwarded = self._forwarded_by_context.get(context)
1078        if forwarded is None:
1079            forwarded = set()
1080            self._forwarded_by_context[context] = forwarded
1081
1082        if fullname in forwarded:
1083            return
1084
1085        path = []
1086        while fullname:
1087            path.append(fullname)
1088            fullname, _, _ = str_rpartition(fullname, u'.')
1089
1090        stream = self._router.stream_by_id(context.context_id)
1091        if stream is None:
1092            LOG.debug('%r: dropping forward of %s to no longer existent '
1093                      '%r', self, path[0], context)
1094            return
1095
1096        for fullname in reversed(path):
1097            self._send_module_and_related(stream, fullname)
1098            self._send_forward_module(stream, context, fullname)
1099
1100    def _forward_modules(self, context, fullnames):
1101        IOLOG.debug('%r._forward_modules(%r, %r)', self, context, fullnames)
1102        for fullname in fullnames:
1103            self._forward_one_module(context, mitogen.core.to_text(fullname))
1104
1105    def forward_modules(self, context, fullnames):
1106        self._router.broker.defer(self._forward_modules, context, fullnames)
1107
1108
1109class Broker(mitogen.core.Broker):
1110    """
1111    .. note::
1112
1113        You may construct as many brokers as desired, and use the same broker
1114        for multiple routers, however usually only one broker need exist.
1115        Multiple brokers may be useful when dealing with sets of children with
1116        differing lifetimes. For example, a subscription service where
1117        non-payment results in termination for one customer.
1118
1119    :param bool install_watcher:
1120        If :data:`True`, an additional thread is started to monitor the
1121        lifetime of the main thread, triggering :meth:`shutdown`
1122        automatically in case the user forgets to call it, or their code
1123        crashed.
1124
1125        You should not rely on this functionality in your program, it is only
1126        intended as a fail-safe and to simplify the API for new users. In
1127        particular, alternative Python implementations may not be able to
1128        support watching the main thread.
1129    """
1130    shutdown_timeout = 5.0
1131    _watcher = None
1132    poller_class = mitogen.parent.PREFERRED_POLLER
1133
1134    def __init__(self, install_watcher=True):
1135        if install_watcher:
1136            self._watcher = ThreadWatcher.watch(
1137                target=threading.currentThread(),
1138                on_join=self.shutdown,
1139            )
1140        super(Broker, self).__init__()
1141        self.timers = mitogen.parent.TimerList()
1142
1143    def shutdown(self):
1144        super(Broker, self).shutdown()
1145        if self._watcher:
1146            self._watcher.remove()
1147
1148
1149class Router(mitogen.parent.Router):
1150    """
1151    Extend :class:`mitogen.core.Router` with functionality useful to masters,
1152    and child contexts who later become masters. Currently when this class is
1153    required, the target context's router is upgraded at runtime.
1154
1155    .. note::
1156
1157        You may construct as many routers as desired, and use the same broker
1158        for multiple routers, however usually only one broker and router need
1159        exist. Multiple routers may be useful when dealing with separate trust
1160        domains, for example, manipulating infrastructure belonging to separate
1161        customers or projects.
1162
1163    :param mitogen.master.Broker broker:
1164        Broker to use. If not specified, a private :class:`Broker` is created.
1165
1166    :param int max_message_size:
1167        Override the maximum message size this router is willing to receive or
1168        transmit. Any value set here is automatically inherited by any children
1169        created by the router.
1170
1171        This has a liberal default of 128 MiB, but may be set much lower.
1172        Beware that setting it below 64KiB may encourage unexpected failures as
1173        parents and children can no longer route large Python modules that may
1174        be required by your application.
1175    """
1176
1177    broker_class = Broker
1178
1179    #: When :data:`True`, cause the broker thread and any subsequent broker and
1180    #: main threads existing in any child to write
1181    #: ``/tmp/mitogen.stats.<pid>.<thread_name>.log`` containing a
1182    #: :mod:`cProfile` dump on graceful exit. Must be set prior to construction
1183    #: of any :class:`Broker`, e.g. via::
1184    #:
1185    #:      mitogen.master.Router.profiling = True
1186    profiling = os.environ.get('MITOGEN_PROFILING') is not None
1187
1188    def __init__(self, broker=None, max_message_size=None):
1189        if broker is None:
1190            broker = self.broker_class()
1191        if max_message_size:
1192            self.max_message_size = max_message_size
1193        super(Router, self).__init__(broker)
1194        self.upgrade()
1195
1196    def upgrade(self):
1197        self.id_allocator = IdAllocator(self)
1198        self.responder = ModuleResponder(self)
1199        self.log_forwarder = LogForwarder(self)
1200        self.route_monitor = mitogen.parent.RouteMonitor(router=self)
1201        self.add_handler(  # TODO: cutpaste.
1202            fn=self._on_detaching,
1203            handle=mitogen.core.DETACHING,
1204            persist=True,
1205        )
1206
1207    def _on_broker_exit(self):
1208        super(Router, self)._on_broker_exit()
1209        dct = self.get_stats()
1210        dct['self'] = self
1211        dct['minify_ms'] = 1000 * dct['minify_secs']
1212        dct['get_module_ms'] = 1000 * dct['get_module_secs']
1213        dct['good_load_module_size_kb'] = dct['good_load_module_size'] / 1024.0
1214        dct['good_load_module_size_avg'] = (
1215            (
1216                dct['good_load_module_size'] /
1217                (float(dct['good_load_module_count']) or 1.0)
1218            ) / 1024.0
1219        )
1220
1221        LOG.debug(
1222            '%(self)r: stats: '
1223                '%(get_module_count)d module requests in '
1224                '%(get_module_ms)d ms, '
1225                '%(good_load_module_count)d sent '
1226                '(%(minify_ms)d ms minify time), '
1227                '%(bad_load_module_count)d negative responses. '
1228                'Sent %(good_load_module_size_kb).01f kb total, '
1229                '%(good_load_module_size_avg).01f kb avg.'
1230            % dct
1231        )
1232
1233    def get_stats(self):
1234        """
1235        Return performance data for the module responder.
1236
1237        :returns:
1238
1239            Dict containing keys:
1240
1241            * `get_module_count`: Integer count of
1242              :data:`mitogen.core.GET_MODULE` messages received.
1243            * `get_module_secs`: Floating point total seconds spent servicing
1244              :data:`mitogen.core.GET_MODULE` requests.
1245            * `good_load_module_count`: Integer count of successful
1246              :data:`mitogen.core.LOAD_MODULE` messages sent.
1247            * `good_load_module_size`: Integer total bytes sent in
1248              :data:`mitogen.core.LOAD_MODULE` message payloads.
1249            * `bad_load_module_count`: Integer count of negative
1250              :data:`mitogen.core.LOAD_MODULE` messages sent.
1251            * `minify_secs`: CPU seconds spent minifying modules marked
1252               minify-safe.
1253        """
1254        return {
1255            'get_module_count': self.responder.get_module_count,
1256            'get_module_secs': self.responder.get_module_secs,
1257            'good_load_module_count': self.responder.good_load_module_count,
1258            'good_load_module_size': self.responder.good_load_module_size,
1259            'bad_load_module_count': self.responder.bad_load_module_count,
1260            'minify_secs': self.responder.minify_secs,
1261        }
1262
1263    def enable_debug(self):
1264        """
1265        Cause this context and any descendant child contexts to write debug
1266        logs to ``/tmp/mitogen.<pid>.log``.
1267        """
1268        mitogen.core.enable_debug_logging()
1269        self.debug = True
1270
1271    def __enter__(self):
1272        return self
1273
1274    def __exit__(self, e_type, e_val, tb):
1275        self.broker.shutdown()
1276        self.broker.join()
1277
1278    def disconnect_stream(self, stream):
1279        self.broker.defer(stream.on_disconnect, self.broker)
1280
1281    def disconnect_all(self):
1282        for stream in self._stream_by_id.values():
1283            self.disconnect_stream(stream)
1284
1285
1286class IdAllocator(object):
1287    """
1288    Allocate IDs for new contexts constructed locally, and blocks of IDs for
1289    children to allocate their own IDs using
1290    :class:`mitogen.parent.ChildIdAllocator` without risk of conflict, and
1291    without necessitating network round-trips for each new context.
1292
1293    This class responds to :data:`mitogen.core.ALLOCATE_ID` messages received
1294    from children by replying with fresh block ID allocations.
1295
1296    The master's :class:`IdAllocator` instance can be accessed via
1297    :attr:`mitogen.master.Router.id_allocator`.
1298    """
1299    #: Block allocations are made in groups of 1000 by default.
1300    BLOCK_SIZE = 1000
1301
1302    def __init__(self, router):
1303        self.router = router
1304        self.next_id = 1
1305        self.lock = threading.Lock()
1306        router.add_handler(
1307            fn=self.on_allocate_id,
1308            handle=mitogen.core.ALLOCATE_ID,
1309        )
1310
1311    def __repr__(self):
1312        return 'IdAllocator(%r)' % (self.router,)
1313
1314    def allocate(self):
1315        """
1316        Allocate a context ID by directly incrementing an internal counter.
1317
1318        :returns:
1319            The new context ID.
1320        """
1321        self.lock.acquire()
1322        try:
1323            id_ = self.next_id
1324            self.next_id += 1
1325            return id_
1326        finally:
1327            self.lock.release()
1328
1329    def allocate_block(self):
1330        """
1331        Allocate a block of IDs for use in a child context.
1332
1333        This function is safe to call from any thread.
1334
1335        :returns:
1336            Tuple of the form `(id, end_id)` where `id` is the first usable ID
1337            and `end_id` is the last usable ID.
1338        """
1339        self.lock.acquire()
1340        try:
1341            id_ = self.next_id
1342            self.next_id += self.BLOCK_SIZE
1343            end_id = id_ + self.BLOCK_SIZE
1344            LOG.debug('%r: allocating [%d..%d)', self, id_, end_id)
1345            return id_, end_id
1346        finally:
1347            self.lock.release()
1348
1349    def on_allocate_id(self, msg):
1350        if msg.is_dead:
1351            return
1352
1353        id_, last_id = self.allocate_block()
1354        requestee = self.router.context_by_id(msg.src_id)
1355        LOG.debug('%r: allocating [%r..%r) to %r',
1356                  self, id_, last_id, requestee)
1357        msg.reply((id_, last_id))
1358