1# -*- coding: utf-8 -*- 2#----------------------------------------------------------------------------- 3# Copyright (c) 2005-2019, PyInstaller Development Team. 4# 5# Distributed under the terms of the GNU General Public License with exception 6# for distributing bootloader. 7# 8# The full license is in the file COPYING.txt, distributed with this software. 9#----------------------------------------------------------------------------- 10 11 12""" 13Utility functions related to analyzing/bundling dependencies. 14""" 15 16import ctypes 17import ctypes.util 18import dis 19import io 20import marshal 21import os 22import re 23import struct 24import zipfile 25 26from ..lib.modulegraph import util, modulegraph 27 28from .. import compat 29from ..compat import (is_darwin, is_unix, is_freebsd, is_py2, is_py37, 30 BYTECODE_MAGIC, PY3_BASE_MODULES, 31 exec_python_rc) 32from .dylib import include_library 33from .. import log as logging 34 35logger = logging.getLogger(__name__) 36 37 38# TODO find out if modules from base_library.zip could be somehow bundled into the .exe file. 39def create_py3_base_library(libzip_filename, graph): 40 """ 41 Package basic Python modules into .zip file. The .zip file with basic 42 modules is necessary to have on PYTHONPATH for initializing libpython3 43 in order to run the frozen executable with Python 3. 44 """ 45 # Construct regular expression for matching modules that should be bundled 46 # into base_library.zip. 47 # Excluded are plain 'modules' or 'submodules.ANY_NAME'. 48 # The match has to be exact - start and end of string not substring. 49 regex_modules = '|'.join([r'(^%s$)' % x for x in PY3_BASE_MODULES]) 50 regex_submod = '|'.join([r'(^%s\..*$)' % x for x in PY3_BASE_MODULES]) 51 regex_str = regex_modules + '|' + regex_submod 52 module_filter = re.compile(regex_str) 53 54 try: 55 # Remove .zip from previous run. 56 if os.path.exists(libzip_filename): 57 os.remove(libzip_filename) 58 logger.debug('Adding python files to base_library.zip') 59 # Class zipfile.PyZipFile is not suitable for PyInstaller needs. 60 with zipfile.ZipFile(libzip_filename, mode='w') as zf: 61 zf.debug = 3 62 for mod in graph.flatten(): 63 if type(mod) in (modulegraph.SourceModule, modulegraph.Package): 64 # Bundling just required modules. 65 if module_filter.match(mod.identifier): 66 st = os.stat(mod.filename) 67 timestamp = int(st.st_mtime) 68 size = st.st_size & 0xFFFFFFFF 69 # Name inside a zip archive. 70 # TODO use .pyo suffix if optimize flag is enabled. 71 if type(mod) is modulegraph.Package: 72 new_name = mod.identifier.replace('.', os.sep) + os.sep + '__init__' + '.pyc' 73 else: 74 new_name = mod.identifier.replace('.', os.sep) + '.pyc' 75 76 # Write code to a file. 77 # This code is similar to py_compile.compile(). 78 with io.BytesIO() as fc: 79 # Prepare all data in byte stream file-like object. 80 fc.write(BYTECODE_MAGIC) 81 if is_py37: 82 # Additional bitfield according to PEP 552 83 # zero means timestamp based 84 fc.write(struct.pack('<I', 0)) 85 fc.write(struct.pack('<II', timestamp, size)) 86 marshal.dump(mod.code, fc) 87 # Use a ZipInfo to set timestamp for deterministic build 88 info = zipfile.ZipInfo(new_name) 89 zf.writestr(info, fc.getvalue()) 90 91 except Exception as e: 92 logger.error('base_library.zip could not be created!') 93 raise 94 95 96def scan_code_for_ctypes(co): 97 binaries = [] 98 99 __recursivly_scan_code_objects_for_ctypes(co, binaries) 100 101 # If any of the libraries has been requested with anything 102 # different then the bare filename, drop that entry and warn 103 # the user - pyinstaller would need to patch the compiled pyc 104 # file to make it work correctly! 105 binaries = set(binaries) 106 for binary in list(binaries): 107 # 'binary' might be in some cases None. Some Python 108 # modules might contain code like the following. For 109 # example PyObjC.objc._bridgesupport contain code like 110 # that. 111 # dll = ctypes.CDLL(None) 112 if not binary: 113 # None values has to be removed too. 114 binaries.remove(binary) 115 elif binary != os.path.basename(binary): 116 # TODO make these warnings show up somewhere. 117 try: 118 filename = co.co_filename 119 except: 120 filename = 'UNKNOWN' 121 logger.warning("Ignoring %s imported from %s - ctypes imports " 122 "are only supported using bare filenames", 123 binary, filename) 124 binaries.remove(binary) 125 126 binaries = _resolveCtypesImports(binaries) 127 return binaries 128 129 130def __recursivly_scan_code_objects_for_ctypes(co, binaries): 131 # ctypes scanning requires a scope wider than one bytecode 132 # instruction, so the code resides in a separate function 133 # for clarity. 134 binaries.extend( 135 __scan_code_instruction_for_ctypes( 136 util.iterate_instructions(co))) 137 138 139def __scan_code_instruction_for_ctypes(instructions): 140 """ 141 Detects ctypes dependencies, using reasonable heuristics that 142 should cover most common ctypes usages; returns a tuple of two 143 lists, one containing names of binaries detected as 144 dependencies, the other containing warnings. 145 """ 146 def _libFromConst(): 147 """Extracts library name from an expected LOAD_CONST instruction and 148 appends it to local binaries list. 149 """ 150 instruction = next(instructions) 151 if instruction.opname == 'LOAD_CONST': 152 soname = instruction.argval 153 if isinstance(soname, str): 154 return soname 155 156 while True: 157 try: 158 instruction = next(instructions) 159 expected_ops = ('LOAD_GLOBAL', 'LOAD_NAME') 160 load_method = ('LOAD_ATTR', 'LOAD_METHOD') 161 162 if not instruction or instruction.opname not in expected_ops: 163 continue 164 165 name = instruction.argval 166 if name == "ctypes": 167 # Guesses ctypes has been imported as `import ctypes` and 168 # the members are accessed like: ctypes.CDLL("library.so") 169 # 170 # LOAD_GLOBAL 0 (ctypes) <--- we "are" here right now 171 # LOAD_ATTR 1 (CDLL) 172 # LOAD_CONST 1 ('library.so') 173 # 174 # In this case "strip" the `ctypes` by advancing and expecting 175 # `LOAD_ATTR` next. 176 instruction = next(instructions) 177 if instruction.opname not in load_method: 178 continue 179 name = instruction.argval 180 181 if name in ("CDLL", "WinDLL", "OleDLL", "PyDLL"): 182 # Guesses ctypes imports of this type: CDLL("library.so") 183 # 184 # LOAD_GLOBAL 0 (CDLL) <--- we "are" here right now 185 # LOAD_CONST 1 ('library.so') 186 187 yield _libFromConst() 188 189 elif name in ("cdll", "windll", "oledll", "pydll"): 190 # Guesses ctypes imports of these types: 191 # 192 # * cdll.library (only valid on Windows) 193 # 194 # LOAD_GLOBAL 0 (cdll) <--- we "are" here right now 195 # LOAD_ATTR 1 (library) 196 # 197 # * cdll.LoadLibrary("library.so") 198 # 199 # LOAD_GLOBAL 0 (cdll) <--- we "are" here right now 200 # LOAD_ATTR 1 (LoadLibrary) 201 # LOAD_CONST 1 ('library.so') 202 instruction = next(instructions) 203 if instruction.opname in load_method: 204 if instruction.argval == "LoadLibrary": 205 # Second type, needs to fetch one more instruction 206 yield _libFromConst() 207 else: 208 # First type 209 yield instruction.argval + ".dll" 210 211 elif instruction.opname == 'LOAD_ATTR' and name in ("util",): 212 # Guesses ctypes imports of these types:: 213 # 214 # ctypes.util.find_library('gs') 215 # 216 # LOAD_GLOBAL 0 (ctypes) 217 # LOAD_ATTR 1 (util) <--- we "are" here right now 218 # LOAD_ATTR 1 (find_library) 219 # LOAD_CONST 1 ('gs') 220 instruction = next(instructions) 221 if instruction.opname in load_method: 222 if instruction.argval == "find_library": 223 libname = _libFromConst() 224 if libname: 225 lib = ctypes.util.find_library(libname) 226 if lib: 227 # On Windows, `find_library` may return 228 # a full pathname. See issue #1934 229 yield os.path.basename(lib) 230 except StopIteration: 231 break 232 233 234# TODO Reuse this code with modulegraph implementation 235def _resolveCtypesImports(cbinaries): 236 """ 237 Completes ctypes BINARY entries for modules with their full path. 238 239 Input is a list of c-binary-names (as found by 240 `scan_code_instruction_for_ctypes`). Output is a list of tuples 241 ready to be appended to the ``binaries`` of a modules. 242 243 This function temporarily extents PATH, LD_LIBRARY_PATH or 244 DYLD_LIBRARY_PATH (depending on the plattform) by CONF['pathex'] 245 so shared libs will be search there, too. 246 247 Example: 248 >>> _resolveCtypesImports(['libgs.so']) 249 [(libgs.so', ''/usr/lib/libgs.so', 'BINARY')] 250 251 """ 252 from ctypes.util import find_library 253 from ..config import CONF 254 255 if is_unix: 256 envvar = "LD_LIBRARY_PATH" 257 elif is_darwin: 258 envvar = "DYLD_LIBRARY_PATH" 259 else: 260 envvar = "PATH" 261 262 def _setPaths(): 263 path = os.pathsep.join(CONF['pathex']) 264 old = compat.getenv(envvar) 265 if old is not None: 266 path = os.pathsep.join((path, old)) 267 compat.setenv(envvar, path) 268 return old 269 270 def _restorePaths(old): 271 if old is None: 272 compat.unsetenv(envvar) 273 else: 274 compat.setenv(envvar, old) 275 276 ret = [] 277 278 # Try to locate the shared library on disk. This is done by 279 # executing ctypes.util.find_library prepending ImportTracker's 280 # local paths to library search paths, then replaces original values. 281 old = _setPaths() 282 for cbin in cbinaries: 283 cpath = find_library(os.path.splitext(cbin)[0]) 284 if is_unix: 285 # CAVEAT: find_library() is not the correct function. Ctype's 286 # documentation says that it is meant to resolve only the filename 287 # (as a *compiler* does) not the full path. Anyway, it works well 288 # enough on Windows and Mac. On Linux, we need to implement 289 # more code to find out the full path. 290 if cpath is None: 291 cpath = cbin 292 # "man ld.so" says that we should first search LD_LIBRARY_PATH 293 # and then the ldcache 294 for d in compat.getenv(envvar, '').split(os.pathsep): 295 if os.path.isfile(os.path.join(d, cpath)): 296 cpath = os.path.join(d, cpath) 297 break 298 else: 299 if LDCONFIG_CACHE is None: 300 load_ldconfig_cache() 301 if cpath in LDCONFIG_CACHE: 302 cpath = LDCONFIG_CACHE[cpath] 303 assert os.path.isfile(cpath) 304 else: 305 cpath = None 306 if cpath is None: 307 # Skip warning message if cbin (basename of library) is ignored. 308 # This prevents messages like: 309 # 'W: library kernel32.dll required via ctypes not found' 310 if not include_library(cbin): 311 continue 312 logger.warning("library %s required via ctypes not found", cbin) 313 else: 314 if not include_library(cpath): 315 continue 316 ret.append((cbin, cpath, "BINARY")) 317 _restorePaths(old) 318 return ret 319 320 321LDCONFIG_CACHE = None # cache the output of `/sbin/ldconfig -r` 322 323def load_ldconfig_cache(): 324 """ 325 Create a cache of the `ldconfig`-output to call it only once. 326 It contains thousands of libraries and running it on every dynlib 327 is expensive. 328 """ 329 global LDCONFIG_CACHE 330 331 if LDCONFIG_CACHE is not None: 332 return 333 334 from distutils.spawn import find_executable 335 ldconfig = find_executable('ldconfig') 336 if ldconfig is None: 337 # If `lsconfig` is not found in $PATH, search it in some fixed 338 # directories. Simply use a second call instead of fiddling 339 # around with checks for empty env-vars and string-concat. 340 ldconfig = find_executable('ldconfig', 341 '/usr/sbin:/sbin:/usr/bin:/usr/sbin') 342 343 # if we still couldn't find 'ldconfig' command 344 if ldconfig is None: 345 LDCONFIG_CACHE = {} 346 return 347 348 if is_freebsd: 349 # This has a quite different format than other Unixes 350 # [vagrant@freebsd-10 ~]$ ldconfig -r 351 # /var/run/ld-elf.so.hints: 352 # search directories: /lib:/usr/lib:/usr/lib/compat:... 353 # 0:-lgeom.5 => /lib/libgeom.so.5 354 # 184:-lpython2.7.1 => /usr/local/lib/libpython2.7.so.1 355 text = compat.exec_command(ldconfig, '-r') 356 text = text.strip().splitlines()[2:] 357 pattern = re.compile(r'^\s+\d+:-l(.+?)((\.\d+)+) => (\S+)') 358 pattern = re.compile(r'^\s+\d+:-l(\S+)(\s.*)? => (\S+)') 359 else: 360 # Skip first line of the library list because it is just 361 # an informative line and might contain localized characters. 362 # Example of first line with local cs_CZ.UTF-8: 363 #$ /sbin/ldconfig -r 364 #V keši „/etc/ld.so.cache“ nalezeno knihoven: 2799 365 # libzvbi.so.0 (libc6,x86-64) => /lib64/libzvbi.so.0 366 # libzvbi-chains.so.0 (libc6,x86-64) => /lib64/libzvbi-chains.so.0 367 text = compat.exec_command(ldconfig, '-p') 368 text = text.strip().splitlines()[1:] 369 pattern = re.compile(r'^\s+(\S+)(\s.*)? => (\S+)') 370 371 LDCONFIG_CACHE = {} 372 for line in text: 373 # :fixme: this assumes libary names do not contain whitespace 374 m = pattern.match(line) 375 path = m.groups()[-1] 376 if is_freebsd: 377 # Insert `.so` at the end of the lib's basename. soname 378 # and filename may have (different) trailing versions. We 379 # assume the `.so` in the filename to mark the end of the 380 # lib's basename. 381 bname = os.path.basename(path).split('.so', 1)[0] 382 name = 'lib' + m.group(1) 383 assert name.startswith(bname) 384 name = bname + '.so' + name[len(bname):] 385 else: 386 name = m.group(1) 387 # ldconfig may know about several versions of the same lib, 388 # e.g. differents arch, different libc, etc. Use the first 389 # entry. 390 if not name in LDCONFIG_CACHE: 391 LDCONFIG_CACHE[name] = path 392 393 394def get_path_to_egg(path): 395 """ 396 Return the path to the python egg file, if the path points to a 397 file inside a (or to an egg directly). 398 Return `None` otherwise. 399 """ 400 # This assumes, eggs are not nested. 401 # TODO add support for unpacked eggs and for new .whl packages. 402 lastpath = None # marker to stop recursion 403 while path and path != lastpath: 404 if os.path.splitext(path)[1].lower() == (".egg"): 405 if os.path.isfile(path) or os.path.isdir(path): 406 return path 407 lastpath = path 408 path = os.path.dirname(path) 409 return None 410 411 412def is_path_to_egg(path): 413 """ 414 Check if path points to a file inside a python egg file (or to an egg 415 directly). 416 """ 417 return get_path_to_egg(path) is not None 418