1###############################################################################
2# Prepares and processes the data to setup the new process environment
3#
4# author: Thomas Moreau and Olivier Grisel
5#
6# adapted from multiprocessing/spawn.py (17/02/2017)
7#  * Improve logging data
8#
9import os
10import sys
11import runpy
12import types
13from multiprocessing import process, util
14
15
16if sys.platform != 'win32':
17    WINEXE = False
18    WINSERVICE = False
19else:
20    import msvcrt
21    from .reduction import duplicate
22    WINEXE = (sys.platform == 'win32' and getattr(sys, 'frozen', False))
23    WINSERVICE = sys.executable.lower().endswith("pythonservice.exe")
24
25if WINSERVICE:
26    _python_exe = os.path.join(sys.exec_prefix, 'python.exe')
27else:
28    _python_exe = sys.executable
29
30
31def get_executable():
32    return _python_exe
33
34
35def _check_not_importing_main():
36    if getattr(process.current_process(), '_inheriting', False):
37        raise RuntimeError('''
38        An attempt has been made to start a new process before the
39        current process has finished its bootstrapping phase.
40
41        This probably means that you are not using fork to start your
42        child processes and you have forgotten to use the proper idiom
43        in the main module:
44
45            if __name__ == '__main__':
46                freeze_support()
47                ...
48
49        The "freeze_support()" line can be omitted if the program
50        is not going to be frozen to produce an executable.''')
51
52
53def get_preparation_data(name, init_main_module=True):
54    '''
55    Return info about parent needed by child to unpickle process object
56    '''
57    _check_not_importing_main()
58    d = dict(
59        log_to_stderr=util._log_to_stderr,
60        authkey=bytes(process.current_process().authkey),
61        name=name,
62        sys_argv=sys.argv,
63        orig_dir=process.ORIGINAL_DIR,
64        dir=os.getcwd()
65    )
66
67    # Send sys_path and make sure the current directory will not be changed
68    sys_path = [p for p in sys.path]
69    try:
70        i = sys_path.index('')
71    except ValueError:
72        pass
73    else:
74        sys_path[i] = process.ORIGINAL_DIR
75    d['sys_path'] = sys_path
76
77    # Make sure to pass the information if the multiprocessing logger is active
78    if util._logger is not None:
79        d['log_level'] = util._logger.getEffectiveLevel()
80        if len(util._logger.handlers) > 0:
81            h = util._logger.handlers[0]
82            d['log_fmt'] = h.formatter._fmt
83
84    # Tell the child how to communicate with the resource_tracker
85    from .resource_tracker import _resource_tracker
86    _resource_tracker.ensure_running()
87    d["tracker_args"] = {"pid": _resource_tracker._pid}
88    if sys.platform == "win32":
89        child_w = duplicate(
90            msvcrt.get_osfhandle(_resource_tracker._fd), inheritable=True)
91        d["tracker_args"]["fh"] = child_w
92    else:
93        d["tracker_args"]["fd"] = _resource_tracker._fd
94
95    if sys.version_info >= (3, 8) and os.name == 'posix':
96        # joblib/loky#242: allow loky processes to retrieve the resource
97        # tracker of their parent in case the child processes depickles
98        # shared_memory objects, that are still tracked by multiprocessing's
99        # resource_tracker by default.
100        # XXX: this is a workaround that may be error prone: in the future, it
101        # would be better to have loky subclass multiprocessing's shared_memory
102        # to force registration of shared_memory segments via loky's
103        # resource_tracker.
104        from multiprocessing.resource_tracker import (
105            _resource_tracker as mp_resource_tracker
106        )
107        # multiprocessing's resource_tracker must be running before loky
108        # process is created (othewise the child won't be able to use it if it
109        # is created later on)
110        mp_resource_tracker.ensure_running()
111        d["mp_tracker_args"] = {
112            'fd': mp_resource_tracker._fd, 'pid': mp_resource_tracker._pid
113        }
114
115    # Figure out whether to initialise main in the subprocess as a module
116    # or through direct execution (or to leave it alone entirely)
117    if init_main_module:
118        main_module = sys.modules['__main__']
119        try:
120            main_mod_name = getattr(main_module.__spec__, "name", None)
121        except BaseException:
122            main_mod_name = None
123        if main_mod_name is not None:
124            d['init_main_from_name'] = main_mod_name
125        elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
126            main_path = getattr(main_module, '__file__', None)
127            if main_path is not None:
128                if (not os.path.isabs(main_path) and
129                        process.ORIGINAL_DIR is not None):
130                    main_path = os.path.join(process.ORIGINAL_DIR, main_path)
131                d['init_main_from_path'] = os.path.normpath(main_path)
132                # Compat for python2.7
133                d['main_path'] = d['init_main_from_path']
134
135    return d
136
137
138#
139# Prepare current process
140#
141old_main_modules = []
142
143
144def prepare(data):
145    '''
146    Try to get current process ready to unpickle process object
147    '''
148    if 'name' in data:
149        process.current_process().name = data['name']
150
151    if 'authkey' in data:
152        process.current_process().authkey = data['authkey']
153
154    if 'log_to_stderr' in data and data['log_to_stderr']:
155        util.log_to_stderr()
156
157    if 'log_level' in data:
158        util.get_logger().setLevel(data['log_level'])
159
160    if 'log_fmt' in data:
161        import logging
162        util.get_logger().handlers[0].setFormatter(
163            logging.Formatter(data['log_fmt'])
164        )
165
166    if 'sys_path' in data:
167        sys.path = data['sys_path']
168
169    if 'sys_argv' in data:
170        sys.argv = data['sys_argv']
171
172    if 'dir' in data:
173        os.chdir(data['dir'])
174
175    if 'orig_dir' in data:
176        process.ORIGINAL_DIR = data['orig_dir']
177
178    if 'mp_tracker_args' in data:
179        from multiprocessing.resource_tracker import (
180            _resource_tracker as mp_resource_tracker
181        )
182        mp_resource_tracker._fd = data['mp_tracker_args']['fd']
183        mp_resource_tracker._pid = data['mp_tracker_args']['pid']
184    if 'tracker_args' in data:
185        from .resource_tracker import _resource_tracker
186        _resource_tracker._pid = data["tracker_args"]['pid']
187        if sys.platform == 'win32':
188            handle = data["tracker_args"]["fh"]
189            _resource_tracker._fd = msvcrt.open_osfhandle(handle, 0)
190        else:
191            _resource_tracker._fd = data["tracker_args"]["fd"]
192
193    if 'init_main_from_name' in data:
194        _fixup_main_from_name(data['init_main_from_name'])
195    elif 'init_main_from_path' in data:
196        _fixup_main_from_path(data['init_main_from_path'])
197
198
199# Multiprocessing module helpers to fix up the main module in
200# spawned subprocesses
201def _fixup_main_from_name(mod_name):
202    # __main__.py files for packages, directories, zip archives, etc, run
203    # their "main only" code unconditionally, so we don't even try to
204    # populate anything in __main__, nor do we make any changes to
205    # __main__ attributes
206    current_main = sys.modules['__main__']
207    if mod_name == "__main__" or mod_name.endswith(".__main__"):
208        return
209
210    # If this process was forked, __main__ may already be populated
211    if getattr(current_main.__spec__, "name", None) == mod_name:
212        return
213
214    # Otherwise, __main__ may contain some non-main code where we need to
215    # support unpickling it properly. We rerun it as __mp_main__ and make
216    # the normal __main__ an alias to that
217    old_main_modules.append(current_main)
218    main_module = types.ModuleType("__mp_main__")
219    main_content = runpy.run_module(mod_name,
220                                    run_name="__mp_main__",
221                                    alter_sys=True)
222    main_module.__dict__.update(main_content)
223    sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
224
225
226def _fixup_main_from_path(main_path):
227    # If this process was forked, __main__ may already be populated
228    current_main = sys.modules['__main__']
229
230    # Unfortunately, the main ipython launch script historically had no
231    # "if __name__ == '__main__'" guard, so we work around that
232    # by treating it like a __main__.py file
233    # See https://github.com/ipython/ipython/issues/4698
234    main_name = os.path.splitext(os.path.basename(main_path))[0]
235    if main_name == 'ipython':
236        return
237
238    # Otherwise, if __file__ already has the setting we expect,
239    # there's nothing more to do
240    if getattr(current_main, '__file__', None) == main_path:
241        return
242
243    # If the parent process has sent a path through rather than a module
244    # name we assume it is an executable script that may contain
245    # non-main code that needs to be executed
246    old_main_modules.append(current_main)
247    main_module = types.ModuleType("__mp_main__")
248    main_content = runpy.run_path(main_path,
249                                  run_name="__mp_main__")
250    main_module.__dict__.update(main_content)
251    sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
252
253
254def import_main_path(main_path):
255    '''
256    Set sys.modules['__main__'] to module at main_path
257    '''
258    _fixup_main_from_path(main_path)
259