1# -*- coding: utf-8 -*-
2#-----------------------------------------------------------------------------
3# Copyright (c) 2005-2019, PyInstaller Development Team.
4#
5# Distributed under the terms of the GNU General Public License with exception
6# for distributing bootloader.
7#
8# The full license is in the file COPYING.txt, distributed with this software.
9#-----------------------------------------------------------------------------
10
11
12"""
13Utility functions related to analyzing/bundling dependencies.
14"""
15
16import ctypes
17import ctypes.util
18import dis
19import io
20import marshal
21import os
22import re
23import struct
24import zipfile
25
26from ..lib.modulegraph import util, modulegraph
27
28from .. import compat
29from ..compat import (is_darwin, is_unix, is_freebsd, is_py2, is_py37,
30                      BYTECODE_MAGIC, PY3_BASE_MODULES,
31                      exec_python_rc)
32from .dylib import include_library
33from .. import log as logging
34
35logger = logging.getLogger(__name__)
36
37
38# TODO find out if modules from base_library.zip could be somehow bundled into the .exe file.
39def create_py3_base_library(libzip_filename, graph):
40    """
41    Package basic Python modules into .zip file. The .zip file with basic
42    modules is necessary to have on PYTHONPATH for initializing libpython3
43    in order to run the frozen executable with Python 3.
44    """
45    # Construct regular expression for matching modules that should be bundled
46    # into base_library.zip.
47    # Excluded are plain 'modules' or 'submodules.ANY_NAME'.
48    # The match has to be exact - start and end of string not substring.
49    regex_modules = '|'.join([r'(^%s$)' % x for x in PY3_BASE_MODULES])
50    regex_submod = '|'.join([r'(^%s\..*$)' % x for x in PY3_BASE_MODULES])
51    regex_str = regex_modules + '|' + regex_submod
52    module_filter = re.compile(regex_str)
53
54    try:
55        # Remove .zip from previous run.
56        if os.path.exists(libzip_filename):
57            os.remove(libzip_filename)
58        logger.debug('Adding python files to base_library.zip')
59        # Class zipfile.PyZipFile is not suitable for PyInstaller needs.
60        with zipfile.ZipFile(libzip_filename, mode='w') as zf:
61            zf.debug = 3
62            for mod in graph.flatten():
63                if type(mod) in (modulegraph.SourceModule, modulegraph.Package):
64                    # Bundling just required modules.
65                    if module_filter.match(mod.identifier):
66                        st = os.stat(mod.filename)
67                        timestamp = int(st.st_mtime)
68                        size = st.st_size & 0xFFFFFFFF
69                        # Name inside a zip archive.
70                        # TODO use .pyo suffix if optimize flag is enabled.
71                        if type(mod) is modulegraph.Package:
72                            new_name = mod.identifier.replace('.', os.sep) + os.sep + '__init__' + '.pyc'
73                        else:
74                            new_name = mod.identifier.replace('.', os.sep) + '.pyc'
75
76                        # Write code to a file.
77                        # This code is similar to py_compile.compile().
78                        with io.BytesIO() as fc:
79                            # Prepare all data in byte stream file-like object.
80                            fc.write(BYTECODE_MAGIC)
81                            if is_py37:
82                                # Additional bitfield according to PEP 552
83                                # zero means timestamp based
84                                fc.write(struct.pack('<I', 0))
85                            fc.write(struct.pack('<II', timestamp, size))
86                            marshal.dump(mod.code, fc)
87                            # Use a ZipInfo to set timestamp for deterministic build
88                            info = zipfile.ZipInfo(new_name)
89                            zf.writestr(info, fc.getvalue())
90
91    except Exception as e:
92        logger.error('base_library.zip could not be created!')
93        raise
94
95
96def scan_code_for_ctypes(co):
97    binaries = []
98
99    __recursivly_scan_code_objects_for_ctypes(co, binaries)
100
101    # If any of the libraries has been requested with anything
102    # different then the bare filename, drop that entry and warn
103    # the user - pyinstaller would need to patch the compiled pyc
104    # file to make it work correctly!
105    binaries = set(binaries)
106    for binary in list(binaries):
107        # 'binary' might be in some cases None. Some Python
108        # modules might contain code like the following. For
109        # example PyObjC.objc._bridgesupport contain code like
110        # that.
111        #     dll = ctypes.CDLL(None)
112        if not binary:
113            # None values has to be removed too.
114            binaries.remove(binary)
115        elif binary != os.path.basename(binary):
116            # TODO make these warnings show up somewhere.
117            try:
118                filename = co.co_filename
119            except:
120                filename = 'UNKNOWN'
121            logger.warning("Ignoring %s imported from %s - ctypes imports "
122                           "are only supported using bare filenames",
123                           binary, filename)
124            binaries.remove(binary)
125
126    binaries = _resolveCtypesImports(binaries)
127    return binaries
128
129
130def __recursivly_scan_code_objects_for_ctypes(co, binaries):
131    # ctypes scanning requires a scope wider than one bytecode
132    # instruction, so the code resides in a separate function
133    # for clarity.
134    binaries.extend(
135        __scan_code_instruction_for_ctypes(
136            util.iterate_instructions(co)))
137
138
139def __scan_code_instruction_for_ctypes(instructions):
140    """
141    Detects ctypes dependencies, using reasonable heuristics that
142    should cover most common ctypes usages; returns a tuple of two
143    lists, one containing names of binaries detected as
144    dependencies, the other containing warnings.
145    """
146    def _libFromConst():
147        """Extracts library name from an expected LOAD_CONST instruction and
148        appends it to local binaries list.
149        """
150        instruction = next(instructions)
151        if instruction.opname == 'LOAD_CONST':
152            soname = instruction.argval
153            if isinstance(soname, str):
154                return soname
155
156    while True:
157        try:
158            instruction = next(instructions)
159            expected_ops = ('LOAD_GLOBAL', 'LOAD_NAME')
160            load_method = ('LOAD_ATTR', 'LOAD_METHOD')
161
162            if not instruction or instruction.opname not in expected_ops:
163                continue
164
165            name = instruction.argval
166            if name == "ctypes":
167                # Guesses ctypes has been imported as `import ctypes` and
168                # the members are accessed like: ctypes.CDLL("library.so")
169                #
170                #   LOAD_GLOBAL 0 (ctypes) <--- we "are" here right now
171                #   LOAD_ATTR 1 (CDLL)
172                #   LOAD_CONST 1 ('library.so')
173                #
174                # In this case "strip" the `ctypes` by advancing and expecting
175                # `LOAD_ATTR` next.
176                instruction = next(instructions)
177                if instruction.opname not in load_method:
178                    continue
179                name = instruction.argval
180
181            if name in ("CDLL", "WinDLL", "OleDLL", "PyDLL"):
182                # Guesses ctypes imports of this type: CDLL("library.so")
183                #
184                #   LOAD_GLOBAL 0 (CDLL) <--- we "are" here right now
185                #   LOAD_CONST 1 ('library.so')
186
187                yield _libFromConst()
188
189            elif name in ("cdll", "windll", "oledll", "pydll"):
190                # Guesses ctypes imports of these types:
191                #
192                #  * cdll.library (only valid on Windows)
193                #
194                #     LOAD_GLOBAL 0 (cdll) <--- we "are" here right now
195                #     LOAD_ATTR 1 (library)
196                #
197                #  * cdll.LoadLibrary("library.so")
198                #
199                #     LOAD_GLOBAL   0 (cdll) <--- we "are" here right now
200                #     LOAD_ATTR     1 (LoadLibrary)
201                #     LOAD_CONST    1 ('library.so')
202                instruction = next(instructions)
203                if instruction.opname in load_method:
204                    if instruction.argval == "LoadLibrary":
205                        # Second type, needs to fetch one more instruction
206                        yield _libFromConst()
207                    else:
208                        # First type
209                        yield instruction.argval + ".dll"
210
211            elif instruction.opname == 'LOAD_ATTR' and name in ("util",):
212                # Guesses ctypes imports of these types::
213                #
214                #  ctypes.util.find_library('gs')
215                #
216                #     LOAD_GLOBAL   0 (ctypes)
217                #     LOAD_ATTR     1 (util) <--- we "are" here right now
218                #     LOAD_ATTR     1 (find_library)
219                #     LOAD_CONST    1 ('gs')
220                instruction = next(instructions)
221                if instruction.opname in load_method:
222                    if instruction.argval == "find_library":
223                        libname = _libFromConst()
224                        if libname:
225                            lib = ctypes.util.find_library(libname)
226                            if lib:
227                                # On Windows, `find_library` may return
228                                # a full pathname. See issue #1934
229                                yield os.path.basename(lib)
230        except StopIteration:
231            break
232
233
234# TODO Reuse this code with modulegraph implementation
235def _resolveCtypesImports(cbinaries):
236    """
237    Completes ctypes BINARY entries for modules with their full path.
238
239    Input is a list of c-binary-names (as found by
240    `scan_code_instruction_for_ctypes`). Output is a list of tuples
241    ready to be appended to the ``binaries`` of a modules.
242
243    This function temporarily extents PATH, LD_LIBRARY_PATH or
244    DYLD_LIBRARY_PATH (depending on the plattform) by CONF['pathex']
245    so shared libs will be search there, too.
246
247    Example:
248    >>> _resolveCtypesImports(['libgs.so'])
249    [(libgs.so', ''/usr/lib/libgs.so', 'BINARY')]
250
251    """
252    from ctypes.util import find_library
253    from ..config import CONF
254
255    if is_unix:
256        envvar = "LD_LIBRARY_PATH"
257    elif is_darwin:
258        envvar = "DYLD_LIBRARY_PATH"
259    else:
260        envvar = "PATH"
261
262    def _setPaths():
263        path = os.pathsep.join(CONF['pathex'])
264        old = compat.getenv(envvar)
265        if old is not None:
266            path = os.pathsep.join((path, old))
267        compat.setenv(envvar, path)
268        return old
269
270    def _restorePaths(old):
271        if old is None:
272            compat.unsetenv(envvar)
273        else:
274            compat.setenv(envvar, old)
275
276    ret = []
277
278    # Try to locate the shared library on disk. This is done by
279    # executing ctypes.util.find_library prepending ImportTracker's
280    # local paths to library search paths, then replaces original values.
281    old = _setPaths()
282    for cbin in cbinaries:
283        cpath = find_library(os.path.splitext(cbin)[0])
284        if is_unix:
285            # CAVEAT: find_library() is not the correct function. Ctype's
286            # documentation says that it is meant to resolve only the filename
287            # (as a *compiler* does) not the full path. Anyway, it works well
288            # enough on Windows and Mac. On Linux, we need to implement
289            # more code to find out the full path.
290            if cpath is None:
291                cpath = cbin
292            # "man ld.so" says that we should first search LD_LIBRARY_PATH
293            # and then the ldcache
294            for d in compat.getenv(envvar, '').split(os.pathsep):
295                if os.path.isfile(os.path.join(d, cpath)):
296                    cpath = os.path.join(d, cpath)
297                    break
298            else:
299                if LDCONFIG_CACHE is None:
300                    load_ldconfig_cache()
301                if cpath in LDCONFIG_CACHE:
302                    cpath = LDCONFIG_CACHE[cpath]
303                    assert os.path.isfile(cpath)
304                else:
305                    cpath = None
306        if cpath is None:
307            # Skip warning message if cbin (basename of library) is ignored.
308            # This prevents messages like:
309            # 'W: library kernel32.dll required via ctypes not found'
310            if not include_library(cbin):
311                continue
312            logger.warning("library %s required via ctypes not found", cbin)
313        else:
314            if not include_library(cpath):
315                continue
316            ret.append((cbin, cpath, "BINARY"))
317    _restorePaths(old)
318    return ret
319
320
321LDCONFIG_CACHE = None  # cache the output of `/sbin/ldconfig -r`
322
323def load_ldconfig_cache():
324    """
325    Create a cache of the `ldconfig`-output to call it only once.
326    It contains thousands of libraries and running it on every dynlib
327    is expensive.
328    """
329    global LDCONFIG_CACHE
330
331    if LDCONFIG_CACHE is not None:
332        return
333
334    from distutils.spawn import find_executable
335    ldconfig = find_executable('ldconfig')
336    if ldconfig is None:
337        # If `lsconfig` is not found in $PATH, search it in some fixed
338        # directories. Simply use a second call instead of fiddling
339        # around with checks for empty env-vars and string-concat.
340        ldconfig = find_executable('ldconfig',
341                                   '/usr/sbin:/sbin:/usr/bin:/usr/sbin')
342
343        # if we still couldn't find 'ldconfig' command
344        if ldconfig is None:
345            LDCONFIG_CACHE = {}
346            return
347
348    if is_freebsd:
349        # This has a quite different format than other Unixes
350        # [vagrant@freebsd-10 ~]$ ldconfig -r
351        # /var/run/ld-elf.so.hints:
352        #     search directories: /lib:/usr/lib:/usr/lib/compat:...
353        #     0:-lgeom.5 => /lib/libgeom.so.5
354        #   184:-lpython2.7.1 => /usr/local/lib/libpython2.7.so.1
355        text = compat.exec_command(ldconfig, '-r')
356        text = text.strip().splitlines()[2:]
357        pattern = re.compile(r'^\s+\d+:-l(.+?)((\.\d+)+) => (\S+)')
358        pattern = re.compile(r'^\s+\d+:-l(\S+)(\s.*)? => (\S+)')
359    else:
360        # Skip first line of the library list because it is just
361        # an informative line and might contain localized characters.
362        # Example of first line with local cs_CZ.UTF-8:
363        #$ /sbin/ldconfig -r
364        #V keši „/etc/ld.so.cache“ nalezeno knihoven: 2799
365        #      libzvbi.so.0 (libc6,x86-64) => /lib64/libzvbi.so.0
366        #      libzvbi-chains.so.0 (libc6,x86-64) => /lib64/libzvbi-chains.so.0
367        text = compat.exec_command(ldconfig, '-p')
368        text = text.strip().splitlines()[1:]
369        pattern = re.compile(r'^\s+(\S+)(\s.*)? => (\S+)')
370
371    LDCONFIG_CACHE = {}
372    for line in text:
373        # :fixme: this assumes libary names do not contain whitespace
374        m = pattern.match(line)
375        path = m.groups()[-1]
376        if is_freebsd:
377            # Insert `.so` at the end of the lib's basename. soname
378            # and filename may have (different) trailing versions. We
379            # assume the `.so` in the filename to mark the end of the
380            # lib's basename.
381            bname = os.path.basename(path).split('.so', 1)[0]
382            name = 'lib' + m.group(1)
383            assert name.startswith(bname)
384            name = bname + '.so' + name[len(bname):]
385        else:
386            name = m.group(1)
387        # ldconfig may know about several versions of the same lib,
388        # e.g. differents arch, different libc, etc. Use the first
389        # entry.
390        if not name in LDCONFIG_CACHE:
391            LDCONFIG_CACHE[name] = path
392
393
394def get_path_to_egg(path):
395    """
396    Return the path to the python egg file, if the path points to a
397    file inside a (or to an egg directly).
398    Return `None` otherwise.
399    """
400    # This assumes, eggs are not nested.
401    # TODO add support for unpacked eggs and for new .whl packages.
402    lastpath = None  # marker to stop recursion
403    while path and path != lastpath:
404        if os.path.splitext(path)[1].lower() == (".egg"):
405            if os.path.isfile(path) or os.path.isdir(path):
406                return path
407        lastpath = path
408        path = os.path.dirname(path)
409    return None
410
411
412def is_path_to_egg(path):
413    """
414    Check if path points to a file inside a python egg file (or to an egg
415       directly).
416    """
417    return get_path_to_egg(path) is not None
418