1#-----------------------------------------------------------------------------
2# Copyright (c) 2005-2019, PyInstaller Development Team.
3#
4# Distributed under the terms of the GNU General Public License with exception
5# for distributing bootloader.
6#
7# The full license is in the file COPYING.txt, distributed with this software.
8#-----------------------------------------------------------------------------
9import copy
10import glob
11import os
12import pkg_resources
13import pkgutil
14import sys
15import textwrap
16
17from ...compat import base_prefix, exec_command_stdout, exec_python, \
18    is_darwin, is_py2, is_py3, is_venv, string_types, open_file, \
19    EXTENSION_SUFFIXES, ALL_SUFFIXES
20from ... import HOMEPATH
21from ... import log as logging
22from ...exceptions import ExecCommandFailed
23
24logger = logging.getLogger(__name__)
25
26# These extensions represent Python executables and should therefore be
27# ignored when collecting data files.
28# NOTE: .dylib files are not Python executable and should not be in this list.
29PY_IGNORE_EXTENSIONS = set(ALL_SUFFIXES)
30
31# Some hooks need to save some values. This is the dict that can be used for
32# that.
33#
34# When running tests this variable should be reset before every test.
35#
36# For example the 'wx' module needs variable 'wxpubsub'. This tells PyInstaller
37# which protocol of the wx module should be bundled.
38hook_variables = {}
39
40
41def __exec_python_cmd(cmd, env=None):
42    """
43    Executes an externally spawned Python interpreter and returns
44    anything that was emitted in the standard output as a single
45    string.
46    """
47    # 'PyInstaller.config' cannot be imported as other top-level modules.
48    from ...config import CONF
49    if env is None:
50        env = {}
51    # Update environment. Defaults to 'os.environ'
52    pp_env = copy.deepcopy(os.environ)
53    pp_env.update(env)
54    # Prepend PYTHONPATH with pathex
55    # Some functions use some PyInstaller code in subprocess so add
56    # PyInstaller HOMEPATH to sys.path too.
57    pp = os.pathsep.join(CONF['pathex'] + [HOMEPATH])
58
59    # On Python 2, `os.environ` may only contain bytes.
60    # Encode unicode filenames using FS encoding.
61    # TODO: `os.environ` wrapper that encodes automatically?
62    if is_py2:
63        if isinstance(pp, unicode):
64            pp = pp.encode(sys.getfilesystemencoding())
65
66    # PYTHONPATH might be already defined in the 'env' argument or in
67    # the original 'os.environ'. Prepend it.
68    if 'PYTHONPATH' in pp_env:
69        pp = os.pathsep.join([pp_env.get('PYTHONPATH'), pp])
70    pp_env['PYTHONPATH'] = pp
71
72    txt = exec_python(*cmd, env=pp_env)
73    return txt.strip()
74
75
76def exec_statement(statement):
77    """
78    Executes a Python statement in an externally spawned interpreter, and
79    returns anything that was emitted in the standard output as a single string.
80    """
81    statement = textwrap.dedent(statement)
82    cmd = ['-c', statement]
83    return __exec_python_cmd(cmd)
84
85
86def exec_script(script_filename, env=None, *args):
87    """
88    Executes a Python script in an externally spawned interpreter, and
89    returns anything that was emitted in the standard output as a
90    single string.
91
92    To prevent misuse, the script passed to utils.hooks.exec_script
93    must be located in the `PyInstaller/utils/hooks/subproc` directory.
94    """
95    script_filename = os.path.basename(script_filename)
96    script_filename = os.path.join(os.path.dirname(__file__), 'subproc', script_filename)
97    if not os.path.exists(script_filename):
98        raise SystemError("To prevent misuse, the script passed to "
99                          "PyInstaller.utils.hooks.exec_script must be located "
100                          "in the `PyInstaller/utils/hooks/subproc` directory.")
101
102    cmd = [script_filename]
103    cmd.extend(args)
104    return __exec_python_cmd(cmd, env=env)
105
106
107def eval_statement(statement):
108    txt = exec_statement(statement).strip()
109    if not txt:
110        # return an empty string which is "not true" but iterable
111        return ''
112    return eval(txt)
113
114
115def eval_script(scriptfilename, env=None, *args):
116    txt = exec_script(scriptfilename, *args, env=env).strip()
117    if not txt:
118        # return an empty string which is "not true" but iterable
119        return ''
120    return eval(txt)
121
122
123def get_pyextension_imports(modname):
124    """
125    Return list of modules required by binary (C/C++) Python extension.
126
127    Python extension files ends with .so (Unix) or .pyd (Windows).
128    It's almost impossible to analyze binary extension and its dependencies.
129
130    Module cannot be imported directly.
131
132    Let's at least try import it in a subprocess and get the difference
133    in module list from sys.modules.
134
135    This function could be used for 'hiddenimports' in PyInstaller hooks files.
136    """
137
138    statement = """
139        import sys
140        # Importing distutils filters common modules, especially in virtualenv.
141        import distutils
142        original_modlist = set(sys.modules.keys())
143        # When importing this module - sys.modules gets updated.
144        import %(modname)s
145        all_modlist = set(sys.modules.keys())
146        diff = all_modlist - original_modlist
147        # Module list contain original modname. We do not need it there.
148        diff.discard('%(modname)s')
149        # Print module list to stdout.
150        print(list(diff))
151    """ % {'modname': modname}
152    module_imports = eval_statement(statement)
153
154    if not module_imports:
155        logger.error('Cannot find imports for module %s' % modname)
156        return []  # Means no imports found or looking for imports failed.
157    # module_imports = filter(lambda x: not x.startswith('distutils'), module_imports)
158    return module_imports
159
160
161def get_homebrew_path(formula=''):
162    """
163    Return the homebrew path to the requested formula, or the global prefix when
164    called with no argument.  Returns the path as a string or None if not found.
165    :param formula:
166    """
167    import subprocess
168    brewcmd = ['brew', '--prefix']
169    path = None
170    if formula:
171        brewcmd.append(formula)
172        dbgstr = 'homebrew formula "%s"' % formula
173    else:
174        dbgstr = 'homebrew prefix'
175    try:
176        path = subprocess.check_output(brewcmd).strip()
177        logger.debug('Found %s at "%s"' % (dbgstr, path))
178    except OSError:
179        logger.debug('Detected homebrew not installed')
180    except subprocess.CalledProcessError:
181        logger.debug('homebrew formula "%s" not installed' % formula)
182    if path:
183        if is_py3:
184            path = path.decode('utf8')  # OS X filenames are UTF-8
185        return path
186    else:
187        return None
188
189
190# TODO Move to "hooks/hook-OpenGL.py", the only place where this is called.
191def opengl_arrays_modules():
192    """
193    Return list of array modules for OpenGL module.
194
195    e.g. 'OpenGL.arrays.vbo'
196    """
197    statement = 'import OpenGL; print(OpenGL.__path__[0])'
198    opengl_mod_path = exec_statement(statement)
199    arrays_mod_path = os.path.join(opengl_mod_path, 'arrays')
200    files = glob.glob(arrays_mod_path + '/*.py')
201    modules = []
202
203    for f in files:
204        mod = os.path.splitext(os.path.basename(f))[0]
205        # Skip __init__ module.
206        if mod == '__init__':
207            continue
208        modules.append('OpenGL.arrays.' + mod)
209
210    return modules
211
212
213def remove_prefix(string, prefix):
214    """
215    This function removes the given prefix from a string, if the string does
216    indeed begin with the prefix; otherwise, it returns the string
217    unmodified.
218    """
219    if string.startswith(prefix):
220        return string[len(prefix):]
221    else:
222        return string
223
224
225def remove_suffix(string, suffix):
226    """
227    This function removes the given suffix from a string, if the string
228    does indeed end with the prefix; otherwise, it returns the string
229    unmodified.
230    """
231    # Special case: if suffix is empty, string[:0] returns ''. So, test
232    # for a non-empty suffix.
233    if suffix and string.endswith(suffix):
234        return string[:-len(suffix)]
235    else:
236        return string
237
238
239# TODO: Do we really need a helper for this? This is pretty trivially obvious.
240def remove_file_extension(filename):
241    """
242    This function returns filename without its extension.
243
244    For Python C modules it removes even whole '.cpython-34m.so' etc.
245    """
246    for suff in EXTENSION_SUFFIXES:
247        if filename.endswith(suff):
248            return filename[0:filename.rfind(suff)]
249    # Fallback to ordinary 'splitext'.
250    return os.path.splitext(filename)[0]
251
252
253# TODO: Replace most calls to exec_statement() with calls to this function.
254def get_module_attribute(module_name, attr_name):
255    """
256    Get the string value of the passed attribute from the passed module if this
257    attribute is defined by this module _or_ raise `AttributeError` otherwise.
258
259    Since modules cannot be directly imported during analysis, this function
260    spawns a subprocess importing this module and returning the string value of
261    this attribute in this module.
262
263    Parameters
264    ----------
265    module_name : str
266        Fully-qualified name of this module.
267    attr_name : str
268        Name of the attribute in this module to be retrieved.
269
270    Returns
271    ----------
272    str
273        String value of this attribute.
274
275    Raises
276    ----------
277    AttributeError
278        If this attribute is undefined.
279    """
280    # Magic string to be printed and captured below if this attribute is
281    # undefined, which should be sufficiently obscure as to avoid collisions
282    # with actual attribute values. That's the hope, anyway.
283    attr_value_if_undefined = '!)ABadCafe@(D15ea5e#*DeadBeef$&Fee1Dead%^'
284    attr_value = exec_statement("""
285        import %s as m
286        print(getattr(m, %r, %r))
287    """ % (module_name, attr_name, attr_value_if_undefined))
288
289    if attr_value == attr_value_if_undefined:
290        raise AttributeError(
291            'Module %r has no attribute %r' % (module_name, attr_name))
292    else:
293        return attr_value
294
295
296def get_module_file_attribute(package):
297    """
298    Get the absolute path of the module with the passed name.
299
300    Since modules *cannot* be directly imported during analysis, this function
301    spawns a subprocess importing this module and returning the value of this
302    module's `__file__` attribute.
303
304    Parameters
305    ----------
306    package : str
307        Fully-qualified name of this module.
308
309    Returns
310    ----------
311    str
312        Absolute path of this module.
313    """
314    # First try to use 'pkgutil'. - fastest but doesn't work on
315    # certain modules in pywin32, which replace all module attributes
316    # with those of the .dll
317    try:
318        loader = pkgutil.find_loader(package)
319        attr = loader.get_filename(package)
320        # The built-in ``datetime`` module returns ``None``. Mark this as
321        # an ``ImportError``.
322        if not attr:
323            raise ImportError
324    # Second try to import module in a subprocess. Might raise ImportError.
325    except (AttributeError, ImportError):
326        # Statement to return __file__ attribute of a package.
327        __file__statement = """
328            import %s as p
329            try:
330                print(p.__file__)
331            except:
332                # If p lacks a file attribute, hide the exception.
333                pass
334        """
335        attr = exec_statement(__file__statement % package)
336        if not attr.strip():
337            raise ImportError
338    return attr
339
340
341def is_module_satisfies(requirements, version=None, version_attr='__version__'):
342    """
343    `True` if the module, package, or C extension described by the passed
344    requirements string both exists and satisfies these requirements.
345
346    This function checks module versions and extras (i.e., optional install-
347    time features) via the same low-level algorithm leveraged by
348    `easy_install` and `pip`, and should _always_ be called in lieu of manual
349    checking. Attempting to manually check versions and extras invites subtle
350    issues, particularly when comparing versions lexicographically (e.g.,
351    `'00.5' > '0.6'` is `True`, despite being semantically untrue).
352
353    Requirements
354    ----------
355    This function is typically used to compare the version of a currently
356    installed module with some desired version. To do so, a string of the form
357    `{module_name} {comparison_operator} {version}` (e.g., `sphinx >= 1.3`) is
358    passed as the `requirements` parameter, where:
359
360    * `{module_name}` is the fully-qualified name of the module, package, or C
361      extension to be tested (e.g., `yaml`). This is _not_ a `setuptools`-
362      specific distribution name (e.g., `PyYAML`).
363    * `{comparison_operator}` is the numeric comparison to be performed. All
364      numeric Python comparisons are supported (e.g., `!=`, `==`, `<`, `>=`).
365    * `{version}` is the desired PEP 0440-compliant version (e.g., `3.14-rc5`)
366      to be compared against the current version of this module.
367
368    This function may also be used to test multiple versions and/or extras.  To
369    do so, a string formatted ala the `pkg_resources.Requirements.parse()`
370    class method (e.g., `idontevenknow<1.6,>1.9,!=1.9.6,<2.0a0,==2.4c1`) is
371    passed as the `requirements` parameter. (See URL below.)
372
373    Implementation
374    ----------
375    This function behaves as follows:
376
377    * If one or more `setuptools` distributions exist for this module, this
378      module was installed via either `easy_install` or `pip`. In either case,
379      `setuptools` machinery is used to validate the passed requirements.
380    * Else, these requirements are manually validated. Since manually
381      validating extras is non-trivial, only versions are manually validated:
382      * If these requirements test only extras (e.g., `Norf [foo, bar]`),
383        `True` is unconditionally returned.
384      * Else, these requirements test one or more versions. Then:
385        1. These requirements are converted into an instance of
386           `pkg_resources.Requirements`, thus parsing these requirements into
387           their constituent components. This is surprisingly non-trivial!
388        1. The current version of the desired module is found as follows:
389           * If the passed `version` parameter is non-`None`, that is used.
390           * Else, a subprocess importing this module is spawned and the value
391             of this module's version attribute in that subprocess is used. The
392             name of this attribute defaults to `__version__` but may be
393             configured with the passed `version_attr` parameter.
394        1. These requirements are validated against this version.
395
396    Note that `setuptools` is generally considered to be the most robust means
397    of comparing version strings in Python. The alternative `LooseVersion()`
398    and `StrictVersion()` functions provided by the standard
399    `distutils.version` module fail for common edge cases: e.g.,
400
401        >>> from distutils.version import LooseVersion
402        >>> LooseVersion('1.5') >= LooseVersion('1.5-rc2')
403        False
404        >>> from pkg_resources import parse_version
405        >>> parse_version('1.5') >= parse_version('1.5-rc2')
406        True
407
408    Parameters
409    ----------
410    requirements : str
411        Requirements in `pkg_resources.Requirements.parse()` format.
412    version : str
413        Optional PEP 0440-compliant version (e.g., `3.14-rc5`) to be used
414        _instead_ of the current version of this module. If non-`None`, this
415        function ignores all `setuptools` distributions for this module and
416        instead compares this version against the version embedded in the
417        passed requirements. This ignores the module name embedded in the
418        passed requirements, permitting arbitrary versions to be compared in a
419        robust manner. (See examples below.)
420    version_attr : str
421        Optional name of the version attribute defined by this module,
422        defaulting to `__version__`. If a `setuptools` distribution exists for
423        this module (there usually does) _and_ the `version` parameter is
424        `None` (it usually is), this parameter is ignored.
425
426    Returns
427    ----------
428    bool
429        Boolean result of the desired validation.
430
431    Raises
432    ----------
433    AttributeError
434        If no `setuptools` distribution exists for this module _and_ this
435        module defines no attribute whose name is the passed
436        `version_attr` parameter.
437    ValueError
438        If the passed specification does _not_ comply with
439        `pkg_resources.Requirements` syntax.
440
441    See Also
442    ----------
443    https://pythonhosted.org/setuptools/pkg_resources.html#id12
444        `pkg_resources.Requirements` syntax details.
445
446    Examples
447    ----------
448        # Assume PIL 2.9.0, Sphinx 1.3.1, and SQLAlchemy 0.6 are all installed.
449        >>> from PyInstaller.util.hooks import is_module_satisfies
450        >>> is_module_satisfies('sphinx >= 1.3.1')
451        True
452        >>> is_module_satisfies('sqlalchemy != 0.6')
453        False
454
455        # Compare two arbitrary versions. In this case, the module name
456        # "sqlalchemy" is simply ignored.
457        >>> is_module_satisfies('sqlalchemy != 0.6', version='0.5')
458        True
459
460        # Since the "pillow" project providing PIL publishes its version via
461        # the custom "PILLOW_VERSION" attribute (rather than the standard
462        # "__version__" attribute), an attribute name is passed as a fallback
463        # to validate PIL when not installed by setuptools. As PIL is usually
464        # installed by setuptools, this optional parameter is usually ignored.
465        >>> is_module_satisfies('PIL == 2.9.0', version_attr='PILLOW_VERSION')
466        True
467    """
468    # If no version was explicitly passed...
469    if version is None:
470        # If a setuptools distribution exists for this module, this validation
471        # is a simple one-liner. This approach supports non-version validation
472        # (e.g., of "["- and "]"-delimited extras) and is hence preferable.
473        try:
474            pkg_resources.get_distribution(requirements)
475        # If no such distribution exists, fallback to the logic below.
476        except pkg_resources.DistributionNotFound:
477            pass
478        # If all existing distributions violate these requirements, fail.
479        except (pkg_resources.UnknownExtra, pkg_resources.VersionConflict):
480            return False
481        # Else, an existing distribution satisfies these requirements. Win!
482        else:
483            return True
484
485    # Either a module version was explicitly passed or no setuptools
486    # distribution exists for this module. First, parse a setuptools
487    # "Requirements" object from this requirements string.
488    requirements_parsed = pkg_resources.Requirement.parse(requirements)
489
490    # If no version was explicitly passed, query this module for it.
491    if version is None:
492        module_name = requirements_parsed.project_name
493        version = get_module_attribute(module_name, version_attr)
494
495    if not version:
496        # Module does not exist in the system.
497        return False
498    else:
499        # Compare this version against the one parsed from the requirements.
500        return version in requirements_parsed
501
502
503def is_package(module_name):
504    """
505    Check if a Python module is really a module or is a package containing
506    other modules.
507
508    :param module_name: Module name to check.
509    :return: True if module is a package else otherwise.
510    """
511    # This way determines if module is a package without importing the module.
512    try:
513        loader = pkgutil.find_loader(module_name)
514    except Exception:
515        # When it fails to find a module loader then it points probably to a class
516        # or function and module is not a package. Just return False.
517        return False
518    else:
519        if loader:
520            # A package must have a __path__ attribute.
521            return loader.is_package(module_name)
522        else:
523            # In case of None - modules is probably not a package.
524            return False
525
526
527def get_package_paths(package):
528    """
529    Given a package, return the path to packages stored on this machine
530    and also returns the path to this particular package. For example,
531    if pkg.subpkg lives in /abs/path/to/python/libs, then this function
532    returns (/abs/path/to/python/libs,
533             /abs/path/to/python/libs/pkg/subpkg).
534    """
535    file_attr = get_module_file_attribute(package)
536
537    # package.__file__ = /abs/path/to/package/subpackage/__init__.py.
538    # Search for Python files in /abs/path/to/package/subpackage; pkg_dir
539    # stores this path.
540    pkg_dir = os.path.dirname(file_attr)
541    # When found, remove /abs/path/to/ from the filename; pkg_base stores
542    # this path to be removed.
543    pkg_base = remove_suffix(pkg_dir, package.replace('.', os.sep))
544
545    return pkg_base, pkg_dir
546
547
548def collect_submodules(package, filter=lambda name: True):
549    """
550    :param package: A string which names the package which will be search for
551        submodules.
552    :param approve: A function to filter through the submodules found,
553        selecting which should be included in the returned list. It takes one
554        argument, a string, which gives the name of a submodule. Only if the
555        function returns true is the given submodule is added to the list of
556        returned modules. For example, ``filter=lambda name: 'test' not in
557        name`` will return modules that don't contain the word ``test``.
558    :return: A list of strings which specify all the modules in package. Its
559        results can be directly assigned to ``hiddenimports`` in a hook script;
560        see, for example, ``hook-sphinx.py``.
561
562    This function is used only for hook scripts, but not by the body of
563    PyInstaller.
564    """
565    # Accept only strings as packages.
566    if not isinstance(package, string_types):
567        raise ValueError
568
569    logger.debug('Collecting submodules for %s' % package)
570    # Skip a module which is not a package.
571    if not is_package(package):
572        logger.debug('collect_submodules - Module %s is not a package.' % package)
573        return []
574
575    # Determine the filesystem path to the specified package.
576    pkg_base, pkg_dir = get_package_paths(package)
577
578    # Walk the package. Since this performs imports, do it in a separate
579    # process.
580    names = exec_statement("""
581        import sys
582        import pkgutil
583
584        # ``pkgutil.walk_packages`` doesn't walk subpackages of zipped files
585        # per https://bugs.python.org/issue14209. This is a workaround.
586        def walk_packages(path=None, prefix='', onerror=None):
587            def seen(p, m={{}}):
588                if p in m:
589                    return True
590                m[p] = True
591
592            for importer, name, ispkg in pkgutil.iter_modules(path, prefix):
593                if not name.startswith(prefix):   ## Added
594                    name = prefix + name          ## Added
595                yield importer, name, ispkg
596
597                if ispkg:
598                    try:
599                        __import__(name)
600                    except ImportError:
601                        if onerror is not None:
602                            onerror(name)
603                    except Exception:
604                        if onerror is not None:
605                            onerror(name)
606                        else:
607                            raise
608                    else:
609                        path = getattr(sys.modules[name], '__path__', None) or []
610
611                        # don't traverse path items we've seen before
612                        path = [p for p in path if not seen(p)]
613
614                        ## Use Py2 code here. It still works in Py3.
615                        for item in walk_packages(path, name+'.', onerror):
616                            yield item
617                        ## This is the original Py3 code.
618                        #yield from walk_packages(path, name+'.', onerror)
619
620        for module_loader, name, ispkg in walk_packages([{}], '{}.'):
621            print(name)
622        """.format(
623                  # Use repr to escape Windows backslashes.
624                  repr(pkg_dir), package))
625
626    # Include the package itself in the results.
627    mods = {package}
628    # Filter through the returend submodules.
629    for name in names.split():
630        if filter(name):
631            mods.add(name)
632
633    logger.debug("collect_submodules - Found submodules: %s", mods)
634    return list(mods)
635
636
637def is_module_or_submodule(name, mod_or_submod):
638    """
639    This helper function is designed for use in the ``filter`` argument of
640    ``collect_submodules``, by returning ``True`` if the given ``name`` is
641    a module or a submodule of ``mod_or_submod``. For example:
642    ``collect_submodules('foo', lambda name: not is_module_or_submodule(name,
643    'foo.test'))`` excludes ``foo.test`` and ``foo.test.one`` but not
644    ``foo.testifier``.
645    """
646    return name.startswith(mod_or_submod + '.') or name == mod_or_submod
647
648
649# Patterns of dynamic library filenames that might be bundled with some
650# installed Python packages.
651PY_DYLIB_PATTERNS = [
652    '*.dll',
653    '*.dylib',
654    'lib*.so',
655]
656
657
658def collect_dynamic_libs(package, destdir=None):
659    """
660    This routine produces a list of (source, dest) of dynamic library
661    files which reside in package. Its results can be directly assigned to
662    ``binaries`` in a hook script. The package parameter must be a string which
663    names the package.
664
665    :param destdir: Relative path to ./dist/APPNAME where the libraries
666                    should be put.
667    """
668    # Accept only strings as packages.
669    if not isinstance(package, string_types):
670        raise ValueError
671
672    logger.debug('Collecting dynamic libraries for %s' % package)
673    pkg_base, pkg_dir = get_package_paths(package)
674    # Walk through all file in the given package, looking for dynamic libraries.
675    dylibs = []
676    for dirpath, _, __ in os.walk(pkg_dir):
677        # Try all file patterns in a given directory.
678        for pattern in PY_DYLIB_PATTERNS:
679            files = glob.glob(os.path.join(dirpath, pattern))
680            for source in files:
681                # Produce the tuple
682                # (/abs/path/to/source/mod/submod/file.pyd,
683                #  mod/submod/file.pyd)
684                if destdir:
685                    # Libraries will be put in the same directory.
686                    dest = destdir
687                else:
688                    # The directory hierarchy is preserved as in the original package.
689                    dest = remove_prefix(dirpath, os.path.dirname(pkg_base) + os.sep)
690                logger.debug(' %s, %s' % (source, dest))
691                dylibs.append((source, dest))
692    return dylibs
693
694
695def collect_data_files(package, include_py_files=False, subdir=None):
696    """
697    This routine produces a list of (source, dest) non-Python (i.e. data)
698    files which reside in package. Its results can be directly assigned to
699    ``datas`` in a hook script; see, for example, hook-sphinx.py. The
700    package parameter must be a string which names the package.
701    By default, all Python executable files (those ending in .py, .pyc,
702    and so on) will NOT be collected; setting the include_py_files
703    argument to True collects these files as well. This is typically used
704    with Python routines (such as those in pkgutil) that search a given
705    directory for Python executable files then load them as extensions or
706    plugins. The optional subdir give a subdirectory relative to package to
707    search, which is helpful when submodules are imported at run-time from a
708    directory lacking __init__.py
709
710    This function does not work on zipped Python eggs.
711
712    This function is used only for hook scripts, but not by the body of
713    PyInstaller.
714    """
715    logger.debug('Collecting data files for %s' % package)
716
717    # Accept only strings as packages.
718    if not isinstance(package, string_types):
719        raise ValueError
720
721    pkg_base, pkg_dir = get_package_paths(package)
722    if subdir:
723        pkg_dir = os.path.join(pkg_dir, subdir)
724    # Walk through all file in the given package, looking for data files.
725    datas = []
726    for dirpath, dirnames, files in os.walk(pkg_dir):
727        for f in files:
728            extension = os.path.splitext(f)[1]
729            if include_py_files or (extension not in PY_IGNORE_EXTENSIONS):
730                # Produce the tuple
731                # (/abs/path/to/source/mod/submod/file.dat,
732                #  mod/submod)
733                source = os.path.join(dirpath, f)
734                dest = remove_prefix(dirpath,
735                                     os.path.dirname(pkg_base) + os.sep)
736                datas.append((source, dest))
737
738    logger.debug("collect_data_files - Found files: %s", datas)
739    return datas
740
741
742def collect_system_data_files(path, destdir=None, include_py_files=False):
743    """
744    This routine produces a list of (source, dest) non-Python (i.e. data)
745    files which reside somewhere on the system. Its results can be directly
746    assigned to ``datas`` in a hook script.
747
748    This function is used only for hook scripts, but not by the body of
749    PyInstaller.
750    """
751    # Accept only strings as paths.
752    if not isinstance(path, string_types):
753        raise ValueError
754    # The call to ``remove_prefix`` below assumes a path separate of ``os.sep``,
755    # which may not be true on Windows; Windows allows Linux path separators in
756    # filenames. Fix this.
757    path = os.path.normpath(path)
758
759    # Walk through all file in the given package, looking for data files.
760    datas = []
761    for dirpath, dirnames, files in os.walk(path):
762        for f in files:
763            extension = os.path.splitext(f)[1]
764            if include_py_files or (extension not in PY_IGNORE_EXTENSIONS):
765                # Produce the tuple
766                # (/abs/path/to/source/mod/submod/file.dat,
767                #  mod/submod/destdir)
768                source = os.path.join(dirpath, f)
769                dest = remove_prefix(dirpath,
770                                     os.path.dirname(path) + os.sep)
771                if destdir is not None:
772                    dest = os.path.join(destdir, dest)
773                datas.append((source, dest))
774
775    return datas
776
777
778def _find_prefix(filename):
779    """
780    In virtualenv, _CONFIG_H and _MAKEFILE may have same or different
781    prefixes, depending on the version of virtualenv.
782    Try to find the correct one, which is assumed to be the longest one.
783    """
784    if not is_venv:
785        return sys.prefix
786    filename = os.path.abspath(filename)
787    prefixes = [os.path.abspath(sys.prefix), base_prefix]
788    possible_prefixes = []
789    for prefix in prefixes:
790        common = os.path.commonprefix([prefix, filename])
791        if common == prefix:
792            possible_prefixes.append(prefix)
793    possible_prefixes.sort(key=lambda p: len(p), reverse=True)
794    return possible_prefixes[0]
795
796
797def relpath_to_config_or_make(filename):
798    """
799    The following is refactored out of hook-sysconfig and hook-distutils,
800    both of which need to generate "datas" tuples for pyconfig.h and
801    Makefile, under the same conditions.
802    """
803
804    # Relative path in the dist directory.
805    prefix = _find_prefix(filename)
806    return os.path.relpath(os.path.dirname(filename), prefix)
807
808
809def copy_metadata(package_name):
810    """
811    This function returns a list to be assigned to the ``datas`` global
812    variable. This list instructs PyInstaller to copy the metadata for the
813    given package to PyInstaller's data directory.
814
815    Parameters
816    ----------
817    package_name : str
818        Specifies the name of the package for which metadata should be copied.
819
820    Returns
821    ----------
822    list
823        This should be assigned to ``datas``.
824
825    Examples
826    ----------
827        >>> from PyInstaller.utils.hooks import copy_metadata
828        >>> copy_metadata('sphinx')
829        [('c:\\python27\\lib\\site-packages\\Sphinx-1.3.2.dist-info',
830          'Sphinx-1.3.2.dist-info')]
831    """
832
833    # Some notes: to look at the metadata locations for all installed
834    # packages::
835    #
836    #     for key, value in pkg_resources.working_set.by_key.iteritems():
837    #         print('{}: {}'.format(key, value.egg_info))
838    #
839    # Looking at this output, I see three general types of packages:
840    #
841    # 1. ``pypubsub: c:\python27\lib\site-packages\pypubsub-3.3.0-py2.7.egg\EGG-INFO``
842    # 2. ``codechat: c:\users\bjones\documents\documentation\CodeChat.egg-info``
843    # 3. ``zest.releaser: c:\python27\lib\site-packages\zest.releaser-6.2.dist-info``
844    # 4. ``pyserial: None``
845    #
846    # The first item shows that some metadata will be nested inside an egg. I
847    # assume we'll have to deal with zipped eggs, but I don't have any examples
848    # handy. The second and third items show different naming conventions for
849    # the metadata-containing directory. The fourth item shows a package with no
850    # metadata.
851    #
852    # So, in cases 1-3, copy the metadata directory. In case 4, emit an error
853    # -- there's no metadata to copy.
854    # See https://pythonhosted.org/setuptools/pkg_resources.html#getting-or-creating-distributions.
855    # Unfortunately, there's no documentation on the ``egg_info`` attribute; it
856    # was found through trial and error.
857    dist = pkg_resources.get_distribution(package_name)
858    metadata_dir = dist.egg_info
859    # Determine a destination directory based on the standardized egg name for
860    # this distribution. This avoids some problems discussed in
861    # https://github.com/pyinstaller/pyinstaller/issues/1888.
862    dest_dir = '{}.egg-info'.format(dist.egg_name())
863    # Per https://github.com/pyinstaller/pyinstaller/issues/1888, ``egg_info``
864    # isn't always defined. Try a workaround based on a suggestion by
865    # @benoit-pierre in that issue.
866    if metadata_dir is None:
867        # We assume that this is an egg, so guess a name based on `egg_name()
868        # <https://pythonhosted.org/setuptools/pkg_resources.html#distribution-methods>`_.
869        metadata_dir = os.path.join(dist.location, dest_dir)
870
871    assert os.path.exists(metadata_dir)
872    logger.debug('Package {} metadata found in {} belongs in {}'.format(
873      package_name, metadata_dir, dest_dir))
874
875    return [(metadata_dir, dest_dir)]
876
877
878def get_installer(module):
879    """
880    Try to find which package manager installed a module.
881
882    :param module: Module to check
883    :return: Package manager or None
884    """
885    file_name = get_module_file_attribute(module)
886    site_dir = file_name[:file_name.index('site-packages') + len('site-packages')]
887    # This is necessary for situations where the project name and module name don't match, i.e.
888    # Project name: pyenchant Module name: enchant
889    pkgs = pkg_resources.find_distributions(site_dir)
890    package = None
891    for pkg in pkgs:
892        if module.lower() in pkg.key:
893            package = pkg
894            break
895    metadata_dir, dest_dir = copy_metadata(package)[0]
896    # Check for an INSTALLER file in the metedata_dir and return the first line
897    # which should be the program that installed the module.
898    installer_file = os.path.join(metadata_dir, 'INSTALLER')
899    if os.path.isdir(metadata_dir) and os.path.exists(installer_file):
900        with open_file(installer_file, 'r') as installer_file_object:
901            lines = installer_file_object.readlines()
902            if lines[0] != '':
903                installer = lines[0].rstrip('\r\n')
904                logger.debug(
905                    'Found installer: \'{0}\' for module: \'{1}\' from package: \'{2}\''.format(installer, module,
906                                                                                                package))
907                return installer
908    if is_darwin:
909        try:
910            output = exec_command_stdout('port', 'provides', file_name)
911            if 'is provided by' in output:
912                logger.debug(
913                    'Found installer: \'macports\' for module: \'{0}\' from package: \'{1}\''.format(module, package))
914                return 'macports'
915        except ExecCommandFailed:
916            pass
917        real_path = os.path.realpath(file_name)
918        if 'Cellar' in real_path:
919            logger.debug(
920                'Found installer: \'homebrew\' for module: \'{0}\' from package: \'{1}\''.format(module, package))
921            return 'homebrew'
922    return None
923
924
925# ``_map_distribution_to_packages`` is expensive. Compute it when used, then
926# return the memoized value. This is a simple alternative to
927# ``functools.lru_cache``.
928def _memoize(f):
929    memo = []
930
931    def helper():
932        if not memo:
933            memo.append(f())
934        return memo[0]
935
936    return helper
937
938
939# Walk through every package, determining which distribution it is in.
940@_memoize
941def _map_distribution_to_packages():
942    logger.info('Determining a mapping of distributions to packages...')
943    dist_to_packages = {}
944    for p in sys.path:
945        # The path entry ``''`` refers to the current directory.
946        if not p:
947            p = '.'
948        # Ignore any entries in ``sys.path`` that don't exist.
949        try:
950            lds = os.listdir(p)
951        except:
952            pass
953        else:
954            for ld in lds:
955                # Not all packages belong to a distribution. Skip these.
956                try:
957                    dist = pkg_resources.get_distribution(ld)
958                except:
959                    pass
960                else:
961                    dist_to_packages.setdefault(dist.key, []).append(ld)
962
963    return dist_to_packages
964
965
966# Given a ``package_name`` as a string, this function returns a list of packages
967# needed to satisfy the requirements. This output can be assigned directly to
968# ``hiddenimports``.
969def requirements_for_package(package_name):
970    hiddenimports = []
971
972    dist_to_packages = _map_distribution_to_packages()
973    for requirement in pkg_resources.get_distribution(package_name).requires():
974        if requirement.key in dist_to_packages:
975            required_packages = dist_to_packages[requirement.key]
976            hiddenimports.extend(required_packages)
977        else:
978            logger.warning('Unable to find package for requirement %s from '
979                           'package %s.',
980                           requirement.project_name, package_name)
981
982    logger.info('Packages required by %s:\n%s', package_name, hiddenimports)
983    return hiddenimports
984
985
986# Given a package name as a string, return a tuple of ``datas, binaries,
987# hiddenimports`` containing all data files, binaries, and modules in the given
988# package. The value of ``include_py_files`` is passed directly to
989# ``collect_data_files``.
990#
991# Typical use: ``datas, binaries, hiddenimports = collect_all('my_module_name')``.
992def collect_all(package_name, include_py_files=True):
993    datas = []
994    try:
995        datas += copy_metadata(package_name)
996    except Exception as e:
997        logger.warning('Unable to copy metadata for %s: %s', package_name, e)
998    datas += collect_data_files(package_name, include_py_files)
999    binaries = collect_dynamic_libs(package_name)
1000    hiddenimports = collect_submodules(package_name)
1001    try:
1002        hiddenimports += requirements_for_package(package_name)
1003    except Exception as e:
1004        logger.warning('Unable to determine requirements for %s: %s',
1005                       package_name, e)
1006
1007    return datas, binaries, hiddenimports
1008
1009
1010# These imports need to be here due to these modules recursively importing this module.
1011from .django import *
1012from .gi import *
1013from .qt import *
1014from .win32 import *
1015