1# Copyright 2020 The Emscripten Authors. All rights reserved. 2# Emscripten is available under two separate licenses, the MIT license and the 3# University of Illinois/NCSA Open Source License. Both these licenses can be 4# found in the LICENSE file. 5 6from __future__ import print_function 7 8import atexit 9import json 10import logging 11import multiprocessing 12import os 13import re 14import shlex 15import shutil 16import subprocess 17import sys 18import tempfile 19from subprocess import STDOUT, PIPE 20 21from . import diagnostics 22from . import response_file 23from . import shared 24from .toolchain_profiler import ToolchainProfiler 25from .shared import Settings, CLANG_CC, CLANG_CXX, PYTHON 26from .shared import LLVM_NM, EMCC, EMAR, EMXX, EMRANLIB, NODE_JS, WASM_LD, LLVM_AR 27from .shared import LLVM_OPT, LLVM_LINK, LLVM_DIS, LLVM_AS, LLVM_OBJCOPY 28from .shared import try_delete, run_process, check_call, exit_with_error 29from .shared import configuration, path_from_root, EXPECTED_BINARYEN_VERSION 30from .shared import asmjs_mangle, DEBUG, WINDOWS, JAVA 31from .shared import EM_BUILD_VERBOSE, TEMP_DIR, print_compiler_stage, BINARYEN_ROOT 32from .shared import CANONICAL_TEMP_DIR, LLVM_DWARFDUMP, demangle_c_symbol_name, asbytes 33from .shared import get_emscripten_temp_dir, exe_suffix, WebAssembly, which, is_c_symbol 34 35logger = logging.getLogger('building') 36 37# Building 38multiprocessing_pool = None 39binaryen_checked = False 40 41# internal caches 42internal_nm_cache = {} 43# cache results of nm - it can be slow to run 44uninternal_nm_cache = {} 45# Stores the object files contained in different archive files passed as input 46ar_contents = {} 47_is_ar_cache = {} 48# the exports the user requested 49user_requested_exports = [] 50 51 52class ObjectFileInfo(object): 53 def __init__(self, returncode, output, defs=set(), undefs=set(), commons=set()): 54 self.returncode = returncode 55 self.output = output 56 self.defs = defs 57 self.undefs = undefs 58 self.commons = commons 59 60 def is_valid_for_nm(self): 61 return self.returncode == 0 62 63 64# llvm-ar appears to just use basenames inside archives. as a result, files 65# with the same basename will trample each other when we extract them. to help 66# warn of such situations, we warn if there are duplicate entries in the 67# archive 68def warn_if_duplicate_entries(archive_contents, archive_filename): 69 if len(archive_contents) != len(set(archive_contents)): 70 msg = '%s: archive file contains duplicate entries. This is not supported by emscripten. Only the last member with a given name will be linked in which can result in undefined symbols. You should either rename your source files, or use `emar` to create you archives which works around this issue.' % archive_filename 71 warned = set() 72 for i in range(len(archive_contents)): 73 curr = archive_contents[i] 74 if curr not in warned and curr in archive_contents[i + 1:]: 75 msg += '\n duplicate: %s' % curr 76 warned.add(curr) 77 diagnostics.warning('emcc', msg) 78 79 80# This function creates a temporary directory specified by the 'dir' field in 81# the returned dictionary. Caller is responsible for cleaning up those files 82# after done. 83def extract_archive_contents(archive_file): 84 lines = run_process([LLVM_AR, 't', archive_file], stdout=PIPE).stdout.splitlines() 85 # ignore empty lines 86 contents = [l for l in lines if len(l)] 87 if len(contents) == 0: 88 logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % archive_file) 89 return { 90 'returncode': 0, 91 'dir': None, 92 'files': [] 93 } 94 95 # `ar` files can only contains filenames. Just to be sure, verify that each 96 # file has only as filename component and is not absolute 97 for f in contents: 98 assert not os.path.dirname(f) 99 assert not os.path.isabs(f) 100 101 warn_if_duplicate_entries(contents, archive_file) 102 103 # create temp dir 104 temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_') 105 106 # extract file in temp dir 107 proc = run_process([LLVM_AR, 'xo', archive_file], stdout=PIPE, stderr=STDOUT, cwd=temp_dir) 108 abs_contents = [os.path.join(temp_dir, c) for c in contents] 109 110 # check that all files were created 111 missing_contents = [x for x in abs_contents if not os.path.exists(x)] 112 if missing_contents: 113 exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout)) 114 115 return { 116 'returncode': proc.returncode, 117 'dir': temp_dir, 118 'files': abs_contents 119 } 120 121 122# Due to a python pickling issue, the following two functions must be at top 123# level, or multiprocessing pool spawn won't find them. 124def g_llvm_nm_uncached(filename): 125 return llvm_nm_uncached(filename) 126 127 128def g_multiprocessing_initializer(*args): 129 for item in args: 130 (key, value) = item.split('=', 1) 131 if key == 'EMCC_POOL_CWD': 132 os.chdir(value) 133 else: 134 os.environ[key] = value 135 136 137def unique_ordered(values): 138 """return a list of unique values in an input list, without changing order 139 (list(set(.)) would change order randomly). 140 """ 141 seen = set() 142 143 def check(value): 144 if value in seen: 145 return False 146 seen.add(value) 147 return True 148 149 return list(filter(check, values)) 150 151 152# clear internal caches. this is not normally needed, except if the clang/LLVM 153# used changes inside this invocation of Building, which can happen in the benchmarker 154# when it compares different builds. 155def clear(): 156 internal_nm_cache.clear() 157 uninternal_nm_cache.clear() 158 ar_contents.clear() 159 _is_ar_cache.clear() 160 161 162def get_num_cores(): 163 return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count())) 164 165 166# Multiprocessing pools are very slow to build up and tear down, and having 167# several pools throughout the application has a problem of overallocating 168# child processes. Therefore maintain a single centralized pool that is shared 169# between all pooled task invocations. 170def get_multiprocessing_pool(): 171 global multiprocessing_pool 172 if not multiprocessing_pool: 173 cores = get_num_cores() 174 if DEBUG: 175 # When in EMCC_DEBUG mode, only use a single core in the pool, so that 176 # logging is not all jumbled up. 177 cores = 1 178 179 # If running with one core only, create a mock instance of a pool that does not 180 # actually spawn any new subprocesses. Very useful for internal debugging. 181 if cores == 1: 182 class FakeMultiprocessor(object): 183 def map(self, func, tasks, *args, **kwargs): 184 results = [] 185 for t in tasks: 186 results += [func(t)] 187 return results 188 189 def map_async(self, func, tasks, *args, **kwargs): 190 class Result: 191 def __init__(self, func, tasks): 192 self.func = func 193 self.tasks = tasks 194 195 def get(self, timeout): 196 results = [] 197 for t in tasks: 198 results += [func(t)] 199 return results 200 201 return Result(func, tasks) 202 203 multiprocessing_pool = FakeMultiprocessor() 204 else: 205 child_env = [ 206 # Multiprocessing pool children must have their current working 207 # directory set to a safe path that is guaranteed not to die in 208 # between of executing commands, or otherwise the pool children will 209 # have trouble spawning subprocesses of their own. 210 'EMCC_POOL_CWD=' + path_from_root(), 211 # Multiprocessing pool children can't spawn their own linear number of 212 # children, that could cause a quadratic amount of spawned processes. 213 'EMCC_CORES=1' 214 ] 215 multiprocessing_pool = multiprocessing.Pool(processes=cores, initializer=g_multiprocessing_initializer, initargs=child_env) 216 217 def close_multiprocessing_pool(): 218 global multiprocessing_pool 219 try: 220 # Shut down the pool explicitly, because leaving that for Python to do at process shutdown is buggy and can generate 221 # noisy "WindowsError: [Error 5] Access is denied" spam which is not fatal. 222 multiprocessing_pool.terminate() 223 multiprocessing_pool.join() 224 multiprocessing_pool = None 225 except OSError as e: 226 # Mute the "WindowsError: [Error 5] Access is denied" errors, raise all others through 227 if not (sys.platform.startswith('win') and isinstance(e, WindowsError) and e.winerror == 5): 228 raise 229 atexit.register(close_multiprocessing_pool) 230 231 return multiprocessing_pool 232 233 234# .. but for Popen, we cannot have doublequotes, so provide functionality to 235# remove them when needed. 236def remove_quotes(arg): 237 if isinstance(arg, list): 238 return [remove_quotes(a) for a in arg] 239 240 if arg.startswith('"') and arg.endswith('"'): 241 return arg[1:-1].replace('\\"', '"') 242 elif arg.startswith("'") and arg.endswith("'"): 243 return arg[1:-1].replace("\\'", "'") 244 else: 245 return arg 246 247 248def get_building_env(cflags=[]): 249 env = os.environ.copy() 250 # point CC etc. to the em* tools. 251 env['CC'] = EMCC 252 env['CXX'] = EMXX 253 env['AR'] = EMAR 254 env['LD'] = EMCC 255 env['NM'] = LLVM_NM 256 env['LDSHARED'] = EMCC 257 env['RANLIB'] = EMRANLIB 258 env['EMSCRIPTEN_TOOLS'] = path_from_root('tools') 259 if cflags: 260 env['CFLAGS'] = env['EMMAKEN_CFLAGS'] = ' '.join(cflags) 261 env['HOST_CC'] = CLANG_CC 262 env['HOST_CXX'] = CLANG_CXX 263 env['HOST_CFLAGS'] = "-W" # if set to nothing, CFLAGS is used, which we don't want 264 env['HOST_CXXFLAGS'] = "-W" # if set to nothing, CXXFLAGS is used, which we don't want 265 env['PKG_CONFIG_LIBDIR'] = path_from_root('system', 'local', 'lib', 'pkgconfig') + os.path.pathsep + path_from_root('system', 'lib', 'pkgconfig') 266 env['PKG_CONFIG_PATH'] = os.environ.get('EM_PKG_CONFIG_PATH', '') 267 env['EMSCRIPTEN'] = path_from_root() 268 env['PATH'] = path_from_root('system', 'bin') + os.pathsep + env['PATH'] 269 env['CROSS_COMPILE'] = path_from_root('em') # produces /path/to/emscripten/em , which then can have 'cc', 'ar', etc appended to it 270 return env 271 272 273# Returns a clone of the given environment with all directories that contain 274# sh.exe removed from the PATH. Used to work around CMake limitation with 275# MinGW Makefiles, where sh.exe is not allowed to be present. 276def remove_sh_exe_from_path(env): 277 env = env.copy() 278 if not WINDOWS: 279 return env 280 path = env['PATH'].split(';') 281 path = [p for p in path if not os.path.exists(os.path.join(p, 'sh.exe'))] 282 env['PATH'] = ';'.join(path) 283 return env 284 285 286def handle_cmake_toolchain(args, env): 287 def has_substr(args, substr): 288 return any(substr in s for s in args) 289 290 # Append the Emscripten toolchain file if the user didn't specify one. 291 if not has_substr(args, '-DCMAKE_TOOLCHAIN_FILE'): 292 args.append('-DCMAKE_TOOLCHAIN_FILE=' + path_from_root('cmake', 'Modules', 'Platform', 'Emscripten.cmake')) 293 node_js = NODE_JS 294 295 if not has_substr(args, '-DCMAKE_CROSSCOMPILING_EMULATOR'): 296 node_js = NODE_JS[0].replace('"', '\"') 297 args.append('-DCMAKE_CROSSCOMPILING_EMULATOR="%s"' % node_js) 298 299 # On Windows specify MinGW Makefiles or ninja if we have them and no other 300 # toolchain was specified, to keep CMake from pulling in a native Visual 301 # Studio, or Unix Makefiles. 302 if WINDOWS and '-G' not in args: 303 if which('mingw32-make'): 304 args += ['-G', 'MinGW Makefiles'] 305 elif which('ninja'): 306 args += ['-G', 'Ninja'] 307 308 # CMake has a requirement that it wants sh.exe off PATH if MinGW Makefiles 309 # is being used. This happens quite often, so do this automatically on 310 # behalf of the user. See 311 # http://www.cmake.org/Wiki/CMake_MinGW_Compiler_Issues 312 if WINDOWS and 'MinGW Makefiles' in args: 313 env = remove_sh_exe_from_path(env) 314 315 return (args, env) 316 317 318def configure(args, stdout=None, stderr=None, env=None, cflags=[], **kwargs): 319 if env: 320 env = env.copy() 321 else: 322 env = get_building_env(cflags=cflags) 323 if 'cmake' in args[0]: 324 # Note: EMMAKEN_JUST_CONFIGURE shall not be enabled when configuring with 325 # CMake. This is because CMake does expect to be able to do 326 # config-time builds with emcc. 327 args, env = handle_cmake_toolchain(args, env) 328 else: 329 # When we configure via a ./configure script, don't do config-time 330 # compilation with emcc, but instead do builds natively with Clang. This 331 # is a heuristic emulation that may or may not work. 332 env['EMMAKEN_JUST_CONFIGURE'] = '1' 333 if EM_BUILD_VERBOSE >= 2: 334 stdout = None 335 if EM_BUILD_VERBOSE >= 1: 336 stderr = None 337 print('configure: ' + ' '.join(args), file=sys.stderr) 338 run_process(args, stdout=stdout, stderr=stderr, env=env, **kwargs) 339 340 341def make(args, stdout=None, stderr=None, env=None, cflags=[], **kwargs): 342 if env is None: 343 env = get_building_env(cflags=cflags) 344 345 # On Windows prefer building with mingw32-make instead of make, if it exists. 346 if WINDOWS: 347 if args[0] == 'make': 348 mingw32_make = which('mingw32-make') 349 if mingw32_make: 350 args[0] = mingw32_make 351 352 if 'mingw32-make' in args[0]: 353 env = remove_sh_exe_from_path(env) 354 355 # On Windows, run the execution through shell to get PATH expansion and 356 # executable extension lookup, e.g. 'sdl2-config' will match with 357 # 'sdl2-config.bat' in PATH. 358 if EM_BUILD_VERBOSE >= 2: 359 stdout = None 360 if EM_BUILD_VERBOSE >= 1: 361 stderr = None 362 print('make: ' + ' '.join(args), file=sys.stderr) 363 run_process(args, stdout=stdout, stderr=stderr, env=env, shell=WINDOWS, **kwargs) 364 365 366def make_paths_absolute(f): 367 if f.startswith('-'): # skip flags 368 return f 369 else: 370 return os.path.abspath(f) 371 372 373# Runs llvm-nm in parallel for the given list of files. 374# The results are populated in uninternal_nm_cache 375# multiprocessing_pool: An existing multiprocessing pool to reuse for the operation, or None 376# to have the function allocate its own. 377def parallel_llvm_nm(files): 378 with ToolchainProfiler.profile_block('parallel_llvm_nm'): 379 pool = get_multiprocessing_pool() 380 object_contents = pool.map(g_llvm_nm_uncached, files) 381 382 for i, file in enumerate(files): 383 if object_contents[i].returncode != 0: 384 logger.debug('llvm-nm failed on file ' + file + ': return code ' + str(object_contents[i].returncode) + ', error: ' + object_contents[i].output) 385 uninternal_nm_cache[file] = object_contents[i] 386 return object_contents 387 388 389def read_link_inputs(files): 390 with ToolchainProfiler.profile_block('read_link_inputs'): 391 # Before performing the link, we need to look at each input file to determine which symbols 392 # each of them provides. Do this in multiple parallel processes. 393 archive_names = [] # .a files passed in to the command line to the link 394 object_names = [] # .o/.bc files passed in to the command line to the link 395 for f in files: 396 absolute_path_f = make_paths_absolute(f) 397 398 if absolute_path_f not in ar_contents and is_ar(absolute_path_f): 399 archive_names.append(absolute_path_f) 400 elif absolute_path_f not in uninternal_nm_cache and is_bitcode(absolute_path_f): 401 object_names.append(absolute_path_f) 402 403 # Archives contain objects, so process all archives first in parallel to obtain the object files in them. 404 pool = get_multiprocessing_pool() 405 object_names_in_archives = pool.map(extract_archive_contents, archive_names) 406 407 def clean_temporary_archive_contents_directory(directory): 408 def clean_at_exit(): 409 try_delete(directory) 410 if directory: 411 atexit.register(clean_at_exit) 412 413 for n in range(len(archive_names)): 414 if object_names_in_archives[n]['returncode'] != 0: 415 raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!') 416 ar_contents[archive_names[n]] = object_names_in_archives[n]['files'] 417 clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir']) 418 419 for o in object_names_in_archives: 420 for f in o['files']: 421 if f not in uninternal_nm_cache: 422 object_names.append(f) 423 424 # Next, extract symbols from all object files (either standalone or inside archives we just extracted) 425 # The results are not used here directly, but populated to llvm-nm cache structure. 426 parallel_llvm_nm(object_names) 427 428 429def llvm_backend_args(): 430 # disable slow and relatively unimportant optimization passes 431 args = ['-combiner-global-alias-analysis=false'] 432 433 # asm.js-style exception handling 434 if Settings.DISABLE_EXCEPTION_CATCHING != 1: 435 args += ['-enable-emscripten-cxx-exceptions'] 436 if Settings.DISABLE_EXCEPTION_CATCHING == 2: 437 allowed = ','.join(Settings.EXCEPTION_CATCHING_ALLOWED or ['__fake']) 438 args += ['-emscripten-cxx-exceptions-allowed=' + allowed] 439 440 # asm.js-style setjmp/longjmp handling 441 args += ['-enable-emscripten-sjlj'] 442 443 # better (smaller, sometimes faster) codegen, see binaryen#1054 444 # and https://bugs.llvm.org/show_bug.cgi?id=39488 445 args += ['-disable-lsr'] 446 447 return args 448 449 450def link_to_object(linker_inputs, target): 451 # link using lld for the wasm backend with wasm object files, 452 # other otherwise for linking of bitcode we must use our python 453 # code (necessary for asm.js, for wasm bitcode see 454 # https://bugs.llvm.org/show_bug.cgi?id=40654) 455 if not Settings.LTO: 456 link_lld(linker_inputs + ['--relocatable'], target) 457 else: 458 link(linker_inputs, target) 459 460 461def link_llvm(linker_inputs, target): 462 # runs llvm-link to link things. 463 cmd = [LLVM_LINK] + linker_inputs + ['-o', target] 464 cmd = get_command_with_possible_response_file(cmd) 465 print_compiler_stage(cmd) 466 output = run_process(cmd, stdout=PIPE).stdout 467 assert os.path.exists(target) and (output is None or 'Could not open input file' not in output), 'Linking error: ' + output 468 return target 469 470 471def lld_flags_for_executable(external_symbol_list): 472 cmd = [] 473 if external_symbol_list: 474 undefs = configuration.get_temp_files().get('.undefined').name 475 with open(undefs, 'w') as f: 476 f.write('\n'.join(external_symbol_list)) 477 cmd.append('--allow-undefined-file=%s' % undefs) 478 else: 479 cmd.append('--allow-undefined') 480 481 # wasi does not import the memory (but for JS it is efficient to do so, 482 # as it allows us to set up memory, preload files, etc. even before the 483 # wasm module arrives) 484 if not Settings.STANDALONE_WASM: 485 cmd.append('--import-memory') 486 cmd.append('--import-table') 487 else: 488 cmd.append('--export-table') 489 490 if Settings.USE_PTHREADS: 491 cmd.append('--shared-memory') 492 493 # wasm-ld can strip debug info for us. this strips both the Names 494 # section and DWARF, so we can only use it when we don't need any of 495 # those things. 496 if Settings.DEBUG_LEVEL < 2 and (not Settings.EMIT_SYMBOL_MAP and 497 not Settings.PROFILING_FUNCS and 498 not Settings.ASYNCIFY): 499 cmd.append('--strip-debug') 500 501 if Settings.RELOCATABLE: 502 if Settings.MAIN_MODULE == 2 or Settings.SIDE_MODULE == 2: 503 cmd.append('--no-export-dynamic') 504 else: 505 cmd.append('--no-gc-sections') 506 cmd.append('--export-dynamic') 507 508 if Settings.LINKABLE: 509 cmd.append('--export-all') 510 else: 511 c_exports = [e for e in Settings.EXPORTED_FUNCTIONS if is_c_symbol(e)] 512 # Strip the leading underscores 513 c_exports = [demangle_c_symbol_name(e) for e in c_exports] 514 if external_symbol_list: 515 # Filter out symbols external/JS symbols 516 c_exports = [e for e in c_exports if e not in external_symbol_list] 517 for export in c_exports: 518 cmd += ['--export', export] 519 520 if Settings.RELOCATABLE: 521 cmd.append('--experimental-pic') 522 if Settings.SIDE_MODULE: 523 cmd.append('-shared') 524 else: 525 cmd.append('-pie') 526 527 if not Settings.SIDE_MODULE: 528 cmd += [ 529 '-z', 'stack-size=%s' % Settings.TOTAL_STACK, 530 '--initial-memory=%d' % Settings.INITIAL_MEMORY, 531 ] 532 533 if Settings.STANDALONE_WASM: 534 # when Settings.EXPECT_MAIN is set we fall back to wasm-ld default of _start 535 if not Settings.EXPECT_MAIN: 536 cmd += ['--entry=_initialize'] 537 else: 538 if Settings.EXPECT_MAIN and not Settings.IGNORE_MISSING_MAIN: 539 cmd += ['--entry=main'] 540 else: 541 cmd += ['--no-entry'] 542 if not Settings.ALLOW_MEMORY_GROWTH: 543 cmd.append('--max-memory=%d' % Settings.INITIAL_MEMORY) 544 elif Settings.MAXIMUM_MEMORY != -1: 545 cmd.append('--max-memory=%d' % Settings.MAXIMUM_MEMORY) 546 if not Settings.RELOCATABLE: 547 cmd.append('--global-base=%s' % Settings.GLOBAL_BASE) 548 549 return cmd 550 551 552def link_lld(args, target, external_symbol_list=None): 553 if not os.path.exists(WASM_LD): 554 exit_with_error('linker binary not found in LLVM directory: %s', WASM_LD) 555 # runs lld to link things. 556 # lld doesn't currently support --start-group/--end-group since the 557 # semantics are more like the windows linker where there is no need for 558 # grouping. 559 args = [a for a in args if a not in ('--start-group', '--end-group')] 560 561 # Emscripten currently expects linkable output (SIDE_MODULE/MAIN_MODULE) to 562 # include all archive contents. 563 if Settings.LINKABLE: 564 args.insert(0, '--whole-archive') 565 args.append('--no-whole-archive') 566 567 if Settings.STRICT: 568 args.append('--fatal-warnings') 569 570 cmd = [WASM_LD, '-o', target] + args 571 for a in llvm_backend_args(): 572 cmd += ['-mllvm', a] 573 574 # For relocatable output (generating an object file) we don't pass any of the 575 # normal linker flags that are used when building and exectuable 576 if '--relocatable' not in args and '-r' not in args: 577 cmd += lld_flags_for_executable(external_symbol_list) 578 579 print_compiler_stage(cmd) 580 cmd = get_command_with_possible_response_file(cmd) 581 check_call(cmd) 582 return target 583 584 585def link(files, target, force_archive_contents=False, just_calculate=False): 586 # "Full-featured" linking: looks into archives (duplicates lld functionality) 587 actual_files = [] 588 # Tracking unresolveds is necessary for .a linking, see below. 589 # Specify all possible entry points to seed the linking process. 590 # For a simple application, this would just be "main". 591 unresolved_symbols = set([func[1:] for func in Settings.EXPORTED_FUNCTIONS]) 592 resolved_symbols = set() 593 # Paths of already included object files from archives. 594 added_contents = set() 595 has_ar = False 596 for f in files: 597 if not f.startswith('-'): 598 has_ar = has_ar or is_ar(make_paths_absolute(f)) 599 600 # If we have only one archive or the force_archive_contents flag is set, 601 # then we will add every object file we see, regardless of whether it 602 # resolves any undefined symbols. 603 force_add_all = len(files) == 1 or force_archive_contents 604 605 # Considers an object file for inclusion in the link. The object is included 606 # if force_add=True or if the object provides a currently undefined symbol. 607 # If the object is included, the symbol tables are updated and the function 608 # returns True. 609 def consider_object(f, force_add=False): 610 new_symbols = llvm_nm(f) 611 # Check if the object was valid according to llvm-nm. It also accepts 612 # native object files. 613 if not new_symbols.is_valid_for_nm(): 614 diagnostics.warning('emcc', 'object %s is not valid according to llvm-nm, cannot link', f) 615 return False 616 # Check the object is valid for us, and not a native object file. 617 if not is_bitcode(f): 618 exit_with_error('unknown file type: %s', f) 619 provided = new_symbols.defs.union(new_symbols.commons) 620 do_add = force_add or not unresolved_symbols.isdisjoint(provided) 621 if do_add: 622 logger.debug('adding object %s to link (forced: %d)' % (f, force_add)) 623 # Update resolved_symbols table with newly resolved symbols 624 resolved_symbols.update(provided) 625 # Update unresolved_symbols table by adding newly unresolved symbols and 626 # removing newly resolved symbols. 627 unresolved_symbols.update(new_symbols.undefs.difference(resolved_symbols)) 628 unresolved_symbols.difference_update(provided) 629 actual_files.append(f) 630 return do_add 631 632 # Traverse a single archive. The object files are repeatedly scanned for 633 # newly satisfied symbols until no new symbols are found. Returns true if 634 # any object files were added to the link. 635 def consider_archive(f, force_add): 636 added_any_objects = False 637 loop_again = True 638 logger.debug('considering archive %s' % (f)) 639 contents = ar_contents[f] 640 while loop_again: # repeatedly traverse until we have everything we need 641 loop_again = False 642 for content in contents: 643 if content in added_contents: 644 continue 645 # Link in the .o if it provides symbols, *or* this is a singleton archive (which is 646 # apparently an exception in gcc ld) 647 if consider_object(content, force_add=force_add): 648 added_contents.add(content) 649 loop_again = True 650 added_any_objects = True 651 logger.debug('done running loop of archive %s' % (f)) 652 return added_any_objects 653 654 read_link_inputs([x for x in files if not x.startswith('-')]) 655 656 # Rescan a group of archives until we don't find any more objects to link. 657 def scan_archive_group(group): 658 loop_again = True 659 logger.debug('starting archive group loop') 660 while loop_again: 661 loop_again = False 662 for archive in group: 663 if consider_archive(archive, force_add=False): 664 loop_again = True 665 logger.debug('done with archive group loop') 666 667 current_archive_group = None 668 in_whole_archive = False 669 for f in files: 670 absolute_path_f = make_paths_absolute(f) 671 if f.startswith('-'): 672 if f in ['--start-group', '-(']: 673 assert current_archive_group is None, 'Nested --start-group, missing --end-group?' 674 current_archive_group = [] 675 elif f in ['--end-group', '-)']: 676 assert current_archive_group is not None, '--end-group without --start-group' 677 scan_archive_group(current_archive_group) 678 current_archive_group = None 679 elif f in ['--whole-archive', '-whole-archive']: 680 in_whole_archive = True 681 elif f in ['--no-whole-archive', '-no-whole-archive']: 682 in_whole_archive = False 683 else: 684 # Command line flags should already be vetted by the time this method 685 # is called, so this is an internal error 686 assert False, 'unsupported link flag: ' + f 687 elif is_ar(absolute_path_f): 688 # Extract object files from ar archives, and link according to gnu ld semantics 689 # (link in an entire .o from the archive if it supplies symbols still unresolved) 690 consider_archive(absolute_path_f, in_whole_archive or force_add_all) 691 # If we're inside a --start-group/--end-group section, add to the list 692 # so we can loop back around later. 693 if current_archive_group is not None: 694 current_archive_group.append(absolute_path_f) 695 elif is_bitcode(absolute_path_f): 696 if has_ar: 697 consider_object(f, force_add=True) 698 else: 699 # If there are no archives then we can simply link all valid object 700 # files and skip the symbol table stuff. 701 actual_files.append(f) 702 else: 703 exit_with_error('unknown file type: %s', f) 704 705 # We have to consider the possibility that --start-group was used without a matching 706 # --end-group; GNU ld permits this behavior and implicitly treats the end of the 707 # command line as having an --end-group. 708 if current_archive_group: 709 logger.debug('--start-group without matching --end-group, rescanning') 710 scan_archive_group(current_archive_group) 711 current_archive_group = None 712 713 try_delete(target) 714 715 # Finish link 716 # tolerate people trying to link a.so a.so etc. 717 actual_files = unique_ordered(actual_files) 718 if just_calculate: 719 # just calculating; return the link arguments which is the final list of files to link 720 return actual_files 721 722 logger.debug('emcc: linking: %s to %s', actual_files, target) 723 link_llvm(actual_files, target) 724 return target 725 726 727def get_command_with_possible_response_file(cmd): 728 # 8k is a bit of an arbitrary limit, but a reasonable one 729 # for max command line size before we use a response file 730 if len(' '.join(cmd)) <= 8192: 731 return cmd 732 733 logger.debug('using response file for %s' % cmd[0]) 734 filename = response_file.create_response_file(cmd[1:], TEMP_DIR) 735 new_cmd = [cmd[0], "@" + filename] 736 return new_cmd 737 738 739# LLVM optimizations 740# @param opt A list of LLVM optimization parameters 741def llvm_opt(filename, opts, out=None): 742 inputs = filename 743 if not isinstance(inputs, list): 744 inputs = [inputs] 745 else: 746 assert out, 'must provide out if llvm_opt on a list of inputs' 747 assert len(opts), 'should not call opt with nothing to do' 748 opts = opts[:] 749 750 target = out or (filename + '.opt.bc') 751 cmd = [LLVM_OPT] + inputs + opts + ['-o', target] 752 cmd = get_command_with_possible_response_file(cmd) 753 print_compiler_stage(cmd) 754 check_call(cmd) 755 assert os.path.exists(target), 'llvm optimizer emitted no output.' 756 if not out: 757 shutil.move(filename + '.opt.bc', filename) 758 return target 759 760 761def llvm_dis(input_filename, output_filename): 762 # LLVM binary ==> LLVM assembly 763 try_delete(output_filename) 764 output = run_process([LLVM_DIS, input_filename, '-o', output_filename], stdout=PIPE).stdout 765 assert os.path.exists(output_filename), 'Could not create .ll file: ' + output 766 767 768def llvm_as(input_filename, output_filename): 769 # LLVM assembly ==> LLVM binary 770 try_delete(output_filename) 771 output = run_process([LLVM_AS, input_filename, '-o', output_filename], stdout=PIPE).stdout 772 assert os.path.exists(output_filename), 'Could not create bc file: ' + output 773 774 775def parse_symbols(output, include_internal=False): 776 defs = [] 777 undefs = [] 778 commons = [] 779 for line in output.split('\n'): 780 if not line or line[0] == '#': 781 continue 782 # e.g. filename.o: , saying which file it's from 783 if ':' in line: 784 continue 785 parts = [seg for seg in line.split(' ') if len(seg)] 786 # pnacl-nm will print zero offsets for bitcode, and newer llvm-nm will print present symbols 787 # as -------- T name 788 if len(parts) == 3 and parts[0] == "--------" or re.match(r'^[\da-f]{8}$', parts[0]): 789 parts.pop(0) 790 if len(parts) == 2: 791 # ignore lines with absolute offsets, these are not bitcode anyhow 792 # e.g. |00000630 t d_source_name| 793 status, symbol = parts 794 if status == 'U': 795 undefs.append(symbol) 796 elif status == 'C': 797 commons.append(symbol) 798 elif (not include_internal and status == status.upper()) or \ 799 (include_internal and status in ['W', 't', 'T', 'd', 'D']): 800 # FIXME: using WTD in the previous line fails due to llvm-nm behavior on macOS, 801 # so for now we assume all uppercase are normally defined external symbols 802 defs.append(symbol) 803 return ObjectFileInfo(0, None, set(defs), set(undefs), set(commons)) 804 805 806def llvm_nm_uncached(filename, stdout=PIPE, stderr=PIPE, include_internal=False): 807 # LLVM binary ==> list of symbols 808 proc = run_process([LLVM_NM, filename], stdout=stdout, stderr=stderr, check=False) 809 if proc.returncode == 0: 810 return parse_symbols(proc.stdout, include_internal) 811 else: 812 return ObjectFileInfo(proc.returncode, str(proc.stdout) + str(proc.stderr)) 813 814 815def llvm_nm(filename, stdout=PIPE, stderr=PIPE, include_internal=False): 816 # Always use absolute paths to maximize cache usage 817 filename = os.path.abspath(filename) 818 819 if include_internal and filename in internal_nm_cache: 820 return internal_nm_cache[filename] 821 elif not include_internal and filename in uninternal_nm_cache: 822 return uninternal_nm_cache[filename] 823 824 ret = llvm_nm_uncached(filename, stdout, stderr, include_internal) 825 826 if ret.returncode != 0: 827 logger.debug('llvm-nm failed on file ' + filename + ': return code ' + str(ret.returncode) + ', error: ' + ret.output) 828 829 # Even if we fail, write the results to the NM cache so that we don't keep trying to llvm-nm the failing file again later. 830 if include_internal: 831 internal_nm_cache[filename] = ret 832 else: 833 uninternal_nm_cache[filename] = ret 834 835 return ret 836 837 838def emcc(filename, args=[], output_filename=None, stdout=None, stderr=None, env=None): 839 if output_filename is None: 840 output_filename = filename + '.o' 841 try_delete(output_filename) 842 run_process([EMCC, filename] + args + ['-o', output_filename], stdout=stdout, stderr=stderr, env=env) 843 844 845def emar(action, output_filename, filenames, stdout=None, stderr=None, env=None): 846 try_delete(output_filename) 847 response_filename = response_file.create_response_file(filenames, TEMP_DIR) 848 cmd = [EMAR, action, output_filename] + ['@' + response_filename] 849 try: 850 run_process(cmd, stdout=stdout, stderr=stderr, env=env) 851 finally: 852 try_delete(response_filename) 853 854 if 'c' in action: 855 assert os.path.exists(output_filename), 'emar could not create output file: ' + output_filename 856 857 858def can_inline(): 859 return Settings.INLINING_LIMIT == 0 860 861 862def get_safe_internalize(): 863 if Settings.LINKABLE: 864 return [] # do not internalize anything 865 866 exps = Settings.EXPORTED_FUNCTIONS 867 internalize_public_api = '-internalize-public-api-' 868 internalize_list = ','.join([demangle_c_symbol_name(exp) for exp in exps]) 869 870 # EXPORTED_FUNCTIONS can potentially be very large. 871 # 8k is a bit of an arbitrary limit, but a reasonable one 872 # for max command line size before we use a response file 873 if len(internalize_list) > 8192: 874 logger.debug('using response file for EXPORTED_FUNCTIONS in internalize') 875 finalized_exports = '\n'.join([exp[1:] for exp in exps]) 876 internalize_list_file = configuration.get_temp_files().get('.response').name 877 with open(internalize_list_file, 'w') as f: 878 f.write(finalized_exports) 879 internalize_public_api += 'file=' + internalize_list_file 880 else: 881 internalize_public_api += 'list=' + internalize_list 882 883 # internalize carefully, llvm 3.2 will remove even main if not told not to 884 return ['-internalize', internalize_public_api] 885 886 887def opt_level_to_str(opt_level, shrink_level=0): 888 # convert opt_level/shrink_level pair to a string argument like -O1 889 if opt_level == 0: 890 return '-O0' 891 if shrink_level == 1: 892 return '-Os' 893 elif shrink_level >= 2: 894 return '-Oz' 895 else: 896 return '-O' + str(min(opt_level, 3)) 897 898 899def js_optimizer(filename, passes): 900 from . import js_optimizer 901 try: 902 return js_optimizer.run(filename, passes) 903 except subprocess.CalledProcessError as e: 904 exit_with_error("'%s' failed (%d)", ' '.join(e.cmd), e.returncode) 905 906 907# run JS optimizer on some JS, ignoring asm.js contents if any - just run on it all 908def acorn_optimizer(filename, passes, extra_info=None, return_output=False): 909 optimizer = path_from_root('tools', 'acorn-optimizer.js') 910 original_filename = filename 911 if extra_info is not None: 912 temp_files = configuration.get_temp_files() 913 temp = temp_files.get('.js').name 914 shutil.copyfile(filename, temp) 915 with open(temp, 'a') as f: 916 f.write('// EXTRA_INFO: ' + extra_info) 917 filename = temp 918 cmd = NODE_JS + [optimizer, filename] + passes 919 # Keep JS code comments intact through the acorn optimization pass so that JSDoc comments 920 # will be carried over to a later Closure run. 921 if Settings.USE_CLOSURE_COMPILER: 922 cmd += ['--closureFriendly'] 923 if not return_output: 924 next = original_filename + '.jso.js' 925 configuration.get_temp_files().note(next) 926 check_call(cmd, stdout=open(next, 'w')) 927 save_intermediate(next, '%s.js' % passes[0]) 928 return next 929 output = check_call(cmd, stdout=PIPE).stdout 930 return output 931 932 933# evals ctors. if binaryen_bin is provided, it is the dir of the binaryen tool 934# for this, and we are in wasm mode 935def eval_ctors(js_file, binary_file, binaryen_bin='', debug_info=False): 936 logger.debug('Ctor evalling in the wasm backend is disabled due to https://github.com/emscripten-core/emscripten/issues/9527') 937 return 938 # TODO re-enable 939 # cmd = [PYTHON, path_from_root('tools', 'ctor_evaller.py'), js_file, binary_file, str(Settings.INITIAL_MEMORY), str(Settings.TOTAL_STACK), str(Settings.GLOBAL_BASE), binaryen_bin, str(int(debug_info))] 940 # if binaryen_bin: 941 # cmd += get_binaryen_feature_flags() 942 # print_compiler_stage(cmd) 943 # check_call(cmd) 944 945 946def get_closure_compiler(): 947 # First check if the user configured a specific CLOSURE_COMPILER in thier settings 948 if shared.CLOSURE_COMPILER: 949 return shared.CLOSURE_COMPILER 950 951 # Otherwise use the one installed vai npm 952 cmd = shared.get_npm_cmd('google-closure-compiler') 953 if not WINDOWS: 954 # Work around an issue that Closure compiler can take up a lot of memory and crash in an error 955 # "FATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap 956 # out of memory" 957 cmd.insert(-1, '--max_old_space_size=8192') 958 return cmd 959 960 961def check_closure_compiler(cmd, args, env): 962 try: 963 output = run_process(cmd + args + ['--version'], stdout=PIPE, env=env).stdout 964 except Exception as e: 965 logger.warn(str(e)) 966 exit_with_error('closure compiler ("%s --version") did not execute properly!' % str(cmd)) 967 968 if 'Version:' not in output: 969 exit_with_error('unrecognized closure compiler --version output (%s):\n%s' % (str(cmd), output)) 970 971 972def closure_compiler(filename, pretty=True, advanced=True, extra_closure_args=None): 973 with ToolchainProfiler.profile_block('closure_compiler'): 974 env = shared.env_with_node_in_path() 975 user_args = [] 976 env_args = os.environ.get('EMCC_CLOSURE_ARGS') 977 if env_args: 978 user_args += shlex.split(env_args) 979 if extra_closure_args: 980 user_args += extra_closure_args 981 982 # Closure compiler expects JAVA_HOME to be set *and* java.exe to be in the PATH in order 983 # to enable use the java backend. Without this it will only try the native and JavaScript 984 # versions of the compiler. 985 java_bin = os.path.dirname(JAVA) 986 if java_bin: 987 def add_to_path(dirname): 988 env['PATH'] = env['PATH'] + os.pathsep + dirname 989 add_to_path(java_bin) 990 java_home = os.path.dirname(java_bin) 991 env.setdefault('JAVA_HOME', java_home) 992 993 if WINDOWS and not any(a.startswith('--platform') for a in user_args): 994 # Disable native compiler on windows until upstream issue is fixed: 995 # https://github.com/google/closure-compiler-npm/issues/147 996 user_args.append('--platform=java') 997 998 closure_cmd = get_closure_compiler() 999 check_closure_compiler(closure_cmd, user_args, env) 1000 1001 # Closure externs file contains known symbols to be extern to the minification, Closure 1002 # should not minify these symbol names. 1003 CLOSURE_EXTERNS = [path_from_root('src', 'closure-externs', 'closure-externs.js')] 1004 1005 # Closure compiler needs to know about all exports that come from the asm.js/wasm module, because to optimize for small code size, 1006 # the exported symbols are added to global scope via a foreach loop in a way that evades Closure's static analysis. With an explicit 1007 # externs file for the exports, Closure is able to reason about the exports. 1008 if Settings.MODULE_EXPORTS and not Settings.DECLARE_ASM_MODULE_EXPORTS: 1009 # Generate an exports file that records all the exported symbols from asm.js/wasm module. 1010 module_exports_suppressions = '\n'.join(['/**\n * @suppress {duplicate, undefinedVars}\n */\nvar %s;\n' % i for i, j in Settings.MODULE_EXPORTS]) 1011 exports_file = configuration.get_temp_files().get('_module_exports.js') 1012 exports_file.write(module_exports_suppressions.encode()) 1013 exports_file.close() 1014 1015 CLOSURE_EXTERNS += [exports_file.name] 1016 1017 # Node.js specific externs 1018 if Settings.target_environment_may_be('node'): 1019 NODE_EXTERNS_BASE = path_from_root('third_party', 'closure-compiler', 'node-externs') 1020 NODE_EXTERNS = os.listdir(NODE_EXTERNS_BASE) 1021 NODE_EXTERNS = [os.path.join(NODE_EXTERNS_BASE, name) for name in NODE_EXTERNS 1022 if name.endswith('.js')] 1023 CLOSURE_EXTERNS += [path_from_root('src', 'closure-externs', 'node-externs.js')] + NODE_EXTERNS 1024 1025 # V8/SpiderMonkey shell specific externs 1026 if Settings.target_environment_may_be('shell'): 1027 V8_EXTERNS = [path_from_root('src', 'closure-externs', 'v8-externs.js')] 1028 SPIDERMONKEY_EXTERNS = [path_from_root('src', 'closure-externs', 'spidermonkey-externs.js')] 1029 CLOSURE_EXTERNS += V8_EXTERNS + SPIDERMONKEY_EXTERNS 1030 1031 # Web environment specific externs 1032 if Settings.target_environment_may_be('web') or Settings.target_environment_may_be('worker'): 1033 BROWSER_EXTERNS_BASE = path_from_root('src', 'closure-externs', 'browser-externs') 1034 if os.path.isdir(BROWSER_EXTERNS_BASE): 1035 BROWSER_EXTERNS = os.listdir(BROWSER_EXTERNS_BASE) 1036 BROWSER_EXTERNS = [os.path.join(BROWSER_EXTERNS_BASE, name) for name in BROWSER_EXTERNS 1037 if name.endswith('.js')] 1038 CLOSURE_EXTERNS += BROWSER_EXTERNS 1039 1040 if Settings.MINIMAL_RUNTIME and Settings.USE_PTHREADS and not Settings.MODULARIZE: 1041 CLOSURE_EXTERNS += [path_from_root('src', 'minimal_runtime_worker_externs.js')] 1042 outfile = filename + '.cc.js' 1043 configuration.get_temp_files().note(outfile) 1044 1045 args = ['--compilation_level', 'ADVANCED_OPTIMIZATIONS' if advanced else 'SIMPLE_OPTIMIZATIONS'] 1046 # Keep in sync with ecmaVersion in tools/acorn-optimizer.js 1047 args += ['--language_in', 'ECMASCRIPT_2018'] 1048 # Tell closure not to do any transpiling or inject any polyfills. 1049 # At some point we may want to look into using this as way to convert to ES5 but 1050 # babel is perhaps a better tool for that. 1051 args += ['--language_out', 'NO_TRANSPILE'] 1052 # Tell closure never to inject the 'use strict' directive. 1053 args += ['--emit_use_strict=false'] 1054 1055 for e in CLOSURE_EXTERNS: 1056 args += ['--externs', e] 1057 args += ['--js_output_file', outfile] 1058 1059 if Settings.IGNORE_CLOSURE_COMPILER_ERRORS: 1060 args.append('--jscomp_off=*') 1061 if pretty: 1062 args += ['--formatting', 'PRETTY_PRINT'] 1063 args += ['--js', filename] 1064 cmd = closure_cmd + args + user_args 1065 logger.debug('closure compiler: ' + ' '.join(cmd)) 1066 1067 proc = run_process(cmd, stderr=PIPE, check=False, env=env) 1068 1069 # XXX Closure bug: if Closure is invoked with --create_source_map, Closure should create a 1070 # outfile.map source map file (https://github.com/google/closure-compiler/wiki/Source-Maps) 1071 # But it looks like it creates such files on Linux(?) even without setting that command line 1072 # flag (and currently we don't), so delete the produced source map file to not leak files in 1073 # temp directory. 1074 try_delete(outfile + '.map') 1075 1076 # Print Closure diagnostics result up front. 1077 if proc.returncode != 0: 1078 logger.error('Closure compiler run failed:\n') 1079 elif len(proc.stderr.strip()) > 0: 1080 if Settings.CLOSURE_WARNINGS == 'error': 1081 logger.error('Closure compiler completed with warnings and -s CLOSURE_WARNINGS=error enabled, aborting!\n') 1082 elif Settings.CLOSURE_WARNINGS == 'warn': 1083 logger.warn('Closure compiler completed with warnings:\n') 1084 1085 # Print input file (long wall of text!) 1086 if DEBUG == 2 and (proc.returncode != 0 or (len(proc.stderr.strip()) > 0 and Settings.CLOSURE_WARNINGS != 'quiet')): 1087 input_file = open(filename, 'r').read().splitlines() 1088 for i in range(len(input_file)): 1089 sys.stderr.write(str(i + 1) + ': ' + input_file[i] + '\n') 1090 1091 if proc.returncode != 0: 1092 logger.error(proc.stderr) # print list of errors (possibly long wall of text if input was minified) 1093 1094 # Exit and print final hint to get clearer output 1095 msg = 'closure compiler failed (rc: %d): %s' % (proc.returncode, shared.shlex_join(cmd)) 1096 if not pretty: 1097 msg += ' the error message may be clearer with -g1 and EMCC_DEBUG=2 set' 1098 exit_with_error(msg) 1099 1100 if len(proc.stderr.strip()) > 0 and Settings.CLOSURE_WARNINGS != 'quiet': 1101 # print list of warnings (possibly long wall of text if input was minified) 1102 if Settings.CLOSURE_WARNINGS == 'error': 1103 logger.error(proc.stderr) 1104 else: 1105 logger.warn(proc.stderr) 1106 1107 # Exit and/or print final hint to get clearer output 1108 if not pretty: 1109 logger.warn('(rerun with -g1 linker flag for an unminified output)') 1110 elif DEBUG != 2: 1111 logger.warn('(rerun with EMCC_DEBUG=2 enabled to dump Closure input file)') 1112 1113 if Settings.CLOSURE_WARNINGS == 'error': 1114 exit_with_error('closure compiler produced warnings and -s CLOSURE_WARNINGS=error enabled') 1115 1116 return outfile 1117 1118 1119# minify the final wasm+JS combination. this is done after all the JS 1120# and wasm optimizations; here we do the very final optimizations on them 1121def minify_wasm_js(js_file, wasm_file, expensive_optimizations, minify_whitespace, debug_info): 1122 # start with JSDCE, to clean up obvious JS garbage. When optimizing for size, 1123 # use AJSDCE (aggressive JS DCE, performs multiple iterations). Clean up 1124 # whitespace if necessary too. 1125 passes = [] 1126 if not Settings.LINKABLE: 1127 passes.append('JSDCE' if not expensive_optimizations else 'AJSDCE') 1128 if minify_whitespace: 1129 passes.append('minifyWhitespace') 1130 if passes: 1131 logger.debug('running cleanup on shell code: ' + ' '.join(passes)) 1132 js_file = acorn_optimizer(js_file, passes) 1133 # if we can optimize this js+wasm combination under the assumption no one else 1134 # will see the internals, do so 1135 if not Settings.LINKABLE: 1136 # if we are optimizing for size, shrink the combined wasm+JS 1137 # TODO: support this when a symbol map is used 1138 if expensive_optimizations: 1139 js_file = metadce(js_file, wasm_file, minify_whitespace=minify_whitespace, debug_info=debug_info) 1140 # now that we removed unneeded communication between js and wasm, we can clean up 1141 # the js some more. 1142 passes = ['AJSDCE'] 1143 if minify_whitespace: 1144 passes.append('minifyWhitespace') 1145 logger.debug('running post-meta-DCE cleanup on shell code: ' + ' '.join(passes)) 1146 js_file = acorn_optimizer(js_file, passes) 1147 if Settings.MINIFY_WASM_IMPORTS_AND_EXPORTS: 1148 js_file = minify_wasm_imports_and_exports(js_file, wasm_file, minify_whitespace=minify_whitespace, minify_exports=Settings.MINIFY_ASMJS_EXPORT_NAMES, debug_info=debug_info) 1149 return js_file 1150 1151 1152# run binaryen's wasm-metadce to dce both js and wasm 1153def metadce(js_file, wasm_file, minify_whitespace, debug_info): 1154 logger.debug('running meta-DCE') 1155 temp_files = configuration.get_temp_files() 1156 # first, get the JS part of the graph 1157 extra_info = '{ "exports": [' + ','.join(map(lambda x: '["' + x[0] + '","' + x[1] + '"]', Settings.MODULE_EXPORTS)) + ']}' 1158 txt = acorn_optimizer(js_file, ['emitDCEGraph', 'noPrint'], return_output=True, extra_info=extra_info) 1159 graph = json.loads(txt) 1160 # add exports based on the backend output, that are not present in the JS 1161 if not Settings.DECLARE_ASM_MODULE_EXPORTS: 1162 exports = set() 1163 for item in graph: 1164 if 'export' in item: 1165 exports.add(item['export']) 1166 for export, unminified in Settings.MODULE_EXPORTS: 1167 if export not in exports: 1168 graph.append({ 1169 'export': export, 1170 'name': 'emcc$export$' + export, 1171 'reaches': [] 1172 }) 1173 # ensure that functions expected to be exported to the outside are roots 1174 for item in graph: 1175 if 'export' in item: 1176 export = item['export'] 1177 # wasm backend's exports are prefixed differently inside the wasm 1178 export = asmjs_mangle(export) 1179 if export in user_requested_exports or Settings.EXPORT_ALL: 1180 item['root'] = True 1181 # in standalone wasm, always export the memory 1182 if Settings.STANDALONE_WASM: 1183 graph.append({ 1184 'export': 'memory', 1185 'name': 'emcc$export$memory', 1186 'reaches': [], 1187 'root': True 1188 }) 1189 graph.append({ 1190 'export': '__indirect_function_table', 1191 'name': 'emcc$export$__indirect_function_table', 1192 'reaches': [], 1193 'root': True 1194 }) 1195 # fix wasi imports TODO: support wasm stable with an option? 1196 WASI_IMPORTS = set([ 1197 'environ_get', 1198 'environ_sizes_get', 1199 'args_get', 1200 'args_sizes_get', 1201 'fd_write', 1202 'fd_close', 1203 'fd_read', 1204 'fd_seek', 1205 'fd_fdstat_get', 1206 'fd_sync', 1207 'proc_exit', 1208 'clock_res_get', 1209 'clock_time_get', 1210 ]) 1211 for item in graph: 1212 if 'import' in item and item['import'][1][1:] in WASI_IMPORTS: 1213 item['import'][0] = Settings.WASI_MODULE_NAME 1214 # fixup wasm backend prefixing 1215 for item in graph: 1216 if 'import' in item: 1217 if item['import'][1][0] == '_': 1218 item['import'][1] = item['import'][1][1:] 1219 # map import names from wasm to JS, using the actual name the wasm uses for the import 1220 import_name_map = {} 1221 for item in graph: 1222 if 'import' in item: 1223 import_name_map[item['name']] = 'emcc$import$' + item['import'][1] 1224 temp = temp_files.get('.txt').name 1225 txt = json.dumps(graph) 1226 with open(temp, 'w') as f: 1227 f.write(txt) 1228 # run wasm-metadce 1229 out = run_binaryen_command('wasm-metadce', 1230 wasm_file, 1231 wasm_file, 1232 ['--graph-file=' + temp], 1233 debug=debug_info, 1234 stdout=PIPE) 1235 # find the unused things in js 1236 unused = [] 1237 PREFIX = 'unused: ' 1238 for line in out.splitlines(): 1239 if line.startswith(PREFIX): 1240 name = line.replace(PREFIX, '').strip() 1241 if name in import_name_map: 1242 name = import_name_map[name] 1243 unused.append(name) 1244 # remove them 1245 passes = ['applyDCEGraphRemovals'] 1246 if minify_whitespace: 1247 passes.append('minifyWhitespace') 1248 extra_info = {'unused': unused} 1249 return acorn_optimizer(js_file, passes, extra_info=json.dumps(extra_info)) 1250 1251 1252def asyncify_lazy_load_code(wasm_binary_target, debug): 1253 # create the lazy-loaded wasm. remove the memory segments from it, as memory 1254 # segments have already been applied by the initial wasm, and apply the knowledge 1255 # that it will only rewind, after which optimizations can remove some code 1256 args = ['--remove-memory', '--mod-asyncify-never-unwind'] 1257 if Settings.OPT_LEVEL > 0: 1258 args.append(opt_level_to_str(Settings.OPT_LEVEL, Settings.SHRINK_LEVEL)) 1259 run_wasm_opt(wasm_binary_target, 1260 wasm_binary_target + '.lazy.wasm', 1261 args=args, 1262 debug=debug) 1263 # re-optimize the original, by applying the knowledge that imports will 1264 # definitely unwind, and we never rewind, after which optimizations can remove 1265 # a lot of code 1266 # TODO: support other asyncify stuff, imports that don't always unwind? 1267 # TODO: source maps etc. 1268 args = ['--mod-asyncify-always-and-only-unwind'] 1269 if Settings.OPT_LEVEL > 0: 1270 args.append(opt_level_to_str(Settings.OPT_LEVEL, Settings.SHRINK_LEVEL)) 1271 run_wasm_opt(infile=wasm_binary_target, 1272 outfile=wasm_binary_target, 1273 args=args, 1274 debug=debug) 1275 1276 1277def minify_wasm_imports_and_exports(js_file, wasm_file, minify_whitespace, minify_exports, debug_info): 1278 logger.debug('minifying wasm imports and exports') 1279 # run the pass 1280 if minify_exports: 1281 # standalone wasm mode means we need to emit a wasi import module. 1282 # otherwise, minify even the imported module names. 1283 if Settings.MINIFY_WASM_IMPORTED_MODULES: 1284 pass_name = '--minify-imports-and-exports-and-modules' 1285 else: 1286 pass_name = '--minify-imports-and-exports' 1287 else: 1288 pass_name = '--minify-imports' 1289 out = run_wasm_opt(wasm_file, wasm_file, 1290 [pass_name], 1291 debug=debug_info, 1292 stdout=PIPE) 1293 # TODO this is the last tool we run, after normal opts and metadce. it 1294 # might make sense to run Stack IR optimizations here or even -O (as 1295 # metadce which runs before us might open up new general optimization 1296 # opportunities). however, the benefit is less than 0.5%. 1297 1298 # get the mapping 1299 SEP = ' => ' 1300 mapping = {} 1301 for line in out.split('\n'): 1302 if SEP in line: 1303 old, new = line.strip().split(SEP) 1304 assert old not in mapping, 'imports must be unique' 1305 mapping[old] = new 1306 # apply them 1307 passes = ['applyImportAndExportNameChanges'] 1308 if minify_whitespace: 1309 passes.append('minifyWhitespace') 1310 extra_info = {'mapping': mapping} 1311 return acorn_optimizer(js_file, passes, extra_info=json.dumps(extra_info)) 1312 1313 1314def wasm2js(js_file, wasm_file, opt_level, minify_whitespace, use_closure_compiler, debug_info, symbols_file=None): 1315 logger.debug('wasm2js') 1316 args = ['--emscripten'] 1317 if opt_level > 0: 1318 args += ['-O'] 1319 if symbols_file: 1320 args += ['--symbols-file=%s' % symbols_file] 1321 wasm2js_js = run_binaryen_command('wasm2js', wasm_file, 1322 args=args, 1323 debug=debug_info, 1324 stdout=PIPE) 1325 if DEBUG: 1326 with open(os.path.join(get_emscripten_temp_dir(), 'wasm2js-output.js'), 'w') as f: 1327 f.write(wasm2js_js) 1328 # JS optimizations 1329 if opt_level >= 2: 1330 passes = [] 1331 # it may be useful to also run: simplifyIfs, registerize, asmLastOpts 1332 # passes += ['simplifyExpressions'] # XXX fails on wasm3js.test_sqlite 1333 # TODO: enable name minification with pthreads. atm wasm2js emits pthread 1334 # helper functions outside of the asmFunc(), and they mix up minifyGlobals 1335 # (which assumes any vars in that area are global, like var HEAP8, but 1336 # those helpers have internal vars in a scope it doesn't understand yet) 1337 if not debug_info and not Settings.USE_PTHREADS: 1338 passes += ['minifyNames'] 1339 if minify_whitespace: 1340 passes += ['minifyWhitespace'] 1341 passes += ['last'] 1342 if passes: 1343 # hackish fixups to work around wasm2js style and the js optimizer FIXME 1344 wasm2js_js = '// EMSCRIPTEN_START_ASM\n' + wasm2js_js + '// EMSCRIPTEN_END_ASM\n' 1345 wasm2js_js = wasm2js_js.replace('// EMSCRIPTEN_START_FUNCS;\n', '// EMSCRIPTEN_START_FUNCS\n') 1346 wasm2js_js = wasm2js_js.replace('// EMSCRIPTEN_END_FUNCS;\n', '// EMSCRIPTEN_END_FUNCS\n') 1347 wasm2js_js = wasm2js_js.replace('\n function $', '\nfunction $') 1348 wasm2js_js = wasm2js_js.replace('\n }', '\n}') 1349 wasm2js_js += '\n// EMSCRIPTEN_GENERATED_FUNCTIONS\n' 1350 temp = configuration.get_temp_files().get('.js').name 1351 with open(temp, 'w') as f: 1352 f.write(wasm2js_js) 1353 temp = js_optimizer(temp, passes) 1354 with open(temp) as f: 1355 wasm2js_js = f.read() 1356 # Closure compiler: in mode 1, we just minify the shell. In mode 2, we 1357 # minify the wasm2js output as well, which is ok since it isn't 1358 # validating asm.js. 1359 # TODO: in the non-closure case, we could run a lightweight general- 1360 # purpose JS minifier here. 1361 if use_closure_compiler == 2: 1362 temp = configuration.get_temp_files().get('.js').name 1363 with open(temp, 'a') as f: 1364 f.write(wasm2js_js) 1365 temp = closure_compiler(temp, pretty=not minify_whitespace, advanced=False) 1366 with open(temp) as f: 1367 wasm2js_js = f.read() 1368 # closure may leave a trailing `;`, which would be invalid given where we place 1369 # this code (inside parens) 1370 wasm2js_js = wasm2js_js.strip() 1371 if wasm2js_js[-1] == ';': 1372 wasm2js_js = wasm2js_js[:-1] 1373 with open(js_file) as f: 1374 all_js = f.read() 1375 # quoted notation, something like Module['__wasm2jsInstantiate__'] 1376 finds = re.findall(r'''[\w\d_$]+\[['"]__wasm2jsInstantiate__['"]\]''', all_js) 1377 if not finds: 1378 # post-closure notation, something like a.__wasm2jsInstantiate__ 1379 finds = re.findall(r'''[\w\d_$]+\.__wasm2jsInstantiate__''', all_js) 1380 assert len(finds) == 1 1381 marker = finds[0] 1382 all_js = all_js.replace(marker, '(\n' + wasm2js_js + '\n)') 1383 # replace the placeholder with the actual code 1384 js_file = js_file + '.wasm2js.js' 1385 with open(js_file, 'w') as f: 1386 f.write(all_js) 1387 return js_file 1388 1389 1390def strip(infile, outfile, debug=False, producers=False): 1391 cmd = [LLVM_OBJCOPY, infile, outfile] 1392 if debug: 1393 cmd += ['--remove-section=.debug*'] 1394 if producers: 1395 cmd += ['--remove-section=producers'] 1396 run_process(cmd) 1397 1398 1399# extract the DWARF info from the main file, and leave the wasm with 1400# debug into as a file on the side 1401# TODO: emit only debug sections in the side file, and not the entire 1402# wasm as well 1403def emit_debug_on_side(wasm_file, wasm_file_with_dwarf): 1404 # if the dwarf filename wasn't provided, use the default target + a suffix 1405 wasm_file_with_dwarf = shared.Settings.SEPARATE_DWARF 1406 if wasm_file_with_dwarf is True: 1407 wasm_file_with_dwarf = wasm_file + '.debug.wasm' 1408 embedded_path = shared.Settings.SEPARATE_DWARF_URL or wasm_file_with_dwarf 1409 1410 shutil.move(wasm_file, wasm_file_with_dwarf) 1411 strip(wasm_file_with_dwarf, wasm_file, debug=True) 1412 1413 # embed a section in the main wasm to point to the file with external DWARF, 1414 # see https://yurydelendik.github.io/webassembly-dwarf/#external-DWARF 1415 section_name = b'\x13external_debug_info' # section name, including prefixed size 1416 filename_bytes = asbytes(embedded_path) 1417 contents = WebAssembly.toLEB(len(filename_bytes)) + filename_bytes 1418 section_size = len(section_name) + len(contents) 1419 with open(wasm_file, 'ab') as f: 1420 f.write(b'\0') # user section is code 0 1421 f.write(WebAssembly.toLEB(section_size)) 1422 f.write(section_name) 1423 f.write(contents) 1424 1425 1426def apply_wasm_memory_growth(js_file): 1427 logger.debug('supporting wasm memory growth with pthreads') 1428 fixed = acorn_optimizer(js_file, ['growableHeap']) 1429 ret = js_file + '.pgrow.js' 1430 with open(fixed, 'r') as fixed_f: 1431 with open(ret, 'w') as ret_f: 1432 with open(path_from_root('src', 'growableHeap.js')) as support_code_f: 1433 ret_f.write(support_code_f.read() + '\n' + fixed_f.read()) 1434 return ret 1435 1436 1437def use_unsigned_pointers_in_js(js_file): 1438 logger.debug('using unsigned pointers in JS') 1439 return acorn_optimizer(js_file, ['unsignPointers']) 1440 1441 1442def instrument_js_for_asan(js_file): 1443 logger.debug('instrumenting JS memory accesses for ASan') 1444 return acorn_optimizer(js_file, ['asanify']) 1445 1446 1447def handle_final_wasm_symbols(wasm_file, symbols_file, debug_info): 1448 logger.debug('handle_final_wasm_symbols') 1449 args = [] 1450 if symbols_file: 1451 args += ['--print-function-map'] 1452 if not debug_info: 1453 # to remove debug info, we just write to that same file, and without -g 1454 args += ['-o', wasm_file] 1455 # ignore stderr because if wasm-opt is run without a -o it will warn 1456 output = run_wasm_opt(wasm_file, args=args, stdout=PIPE) 1457 if symbols_file: 1458 with open(symbols_file, 'w') as f: 1459 f.write(output) 1460 1461 1462def is_ar(filename): 1463 try: 1464 if _is_ar_cache.get(filename): 1465 return _is_ar_cache[filename] 1466 header = open(filename, 'rb').read(8) 1467 sigcheck = header == b'!<arch>\n' 1468 _is_ar_cache[filename] = sigcheck 1469 return sigcheck 1470 except Exception as e: 1471 logger.debug('is_ar failed to test whether file \'%s\' is a llvm archive file! Failed on exception: %s' % (filename, e)) 1472 return False 1473 1474 1475def is_bitcode(filename): 1476 try: 1477 # look for magic signature 1478 b = open(filename, 'rb').read(4) 1479 if b[:2] == b'BC': 1480 return True 1481 # on macOS, there is a 20-byte prefix which starts with little endian 1482 # encoding of 0x0B17C0DE 1483 elif b == b'\xDE\xC0\x17\x0B': 1484 b = bytearray(open(filename, 'rb').read(22)) 1485 return b[20:] == b'BC' 1486 except IndexError: 1487 # not enough characters in the input 1488 # note that logging will be done on the caller function 1489 pass 1490 return False 1491 1492 1493def is_wasm(filename): 1494 magic = open(filename, 'rb').read(4) 1495 return magic == b'\0asm' 1496 1497 1498# Given the name of a special Emscripten-implemented system library, returns an 1499# array of absolute paths to JS library files inside emscripten/src/ that 1500# corresponds to the library name. 1501def path_to_system_js_libraries(library_name): 1502 # Some native libraries are implemented in Emscripten as system side JS libraries 1503 js_system_libraries = { 1504 'c': '', 1505 'dl': '', 1506 'EGL': 'library_egl.js', 1507 'GL': ['library_webgl.js', 'library_html5_webgl.js'], 1508 'webgl.js': ['library_webgl.js', 'library_html5_webgl.js'], 1509 'GLESv2': 'library_webgl.js', 1510 # N.b. there is no GLESv3 to link to (note [f] in https://www.khronos.org/registry/implementers_guide.html) 1511 'GLEW': 'library_glew.js', 1512 'glfw': 'library_glfw.js', 1513 'glfw3': 'library_glfw.js', 1514 'GLU': '', 1515 'glut': 'library_glut.js', 1516 'm': '', 1517 'openal': 'library_openal.js', 1518 'rt': '', 1519 'pthread': '', 1520 'X11': 'library_xlib.js', 1521 'SDL': 'library_sdl.js', 1522 'stdc++': '', 1523 'uuid': 'library_uuid.js', 1524 'websocket': 'library_websocket.js' 1525 } 1526 library_files = [] 1527 if library_name in js_system_libraries: 1528 if len(js_system_libraries[library_name]): 1529 lib = js_system_libraries[library_name] if isinstance(js_system_libraries[library_name], list) else [js_system_libraries[library_name]] 1530 library_files += lib 1531 logger.debug('Linking in JS library ' + str(lib)) 1532 1533 elif library_name.endswith('.js') and os.path.isfile(path_from_root('src', 'library_' + library_name)): 1534 library_files += ['library_' + library_name] 1535 1536 return library_files 1537 1538 1539def emit_wasm_source_map(wasm_file, map_file): 1540 # source file paths must be relative to the location of the map (which is 1541 # emitted alongside the wasm) 1542 base_path = os.path.dirname(os.path.abspath(Settings.WASM_BINARY_FILE)) 1543 sourcemap_cmd = [PYTHON, path_from_root('tools', 'wasm-sourcemap.py'), 1544 wasm_file, 1545 '--dwarfdump=' + LLVM_DWARFDUMP, 1546 '-o', map_file, 1547 '--basepath=' + base_path] 1548 check_call(sourcemap_cmd) 1549 1550 1551def get_binaryen_feature_flags(): 1552 # start with the MVP features, add the rest as needed 1553 ret = ['--mvp-features'] 1554 if Settings.USE_PTHREADS: 1555 ret += ['--enable-threads'] 1556 ret += Settings.BINARYEN_FEATURES 1557 return ret 1558 1559 1560def check_binaryen(bindir): 1561 opt = os.path.join(bindir, exe_suffix('wasm-opt')) 1562 if not os.path.exists(opt): 1563 exit_with_error('binaryen executable not found (%s). Please check your binaryen installation' % opt) 1564 try: 1565 output = run_process([opt, '--version'], stdout=PIPE).stdout 1566 except subprocess.CalledProcessError: 1567 exit_with_error('error running binaryen executable (%s). Please check your binaryen installation' % opt) 1568 if output: 1569 output = output.splitlines()[0] 1570 try: 1571 version = output.split()[2] 1572 version = int(version) 1573 except (IndexError, ValueError): 1574 exit_with_error('error parsing binaryen version (%s). Please check your binaryen installation (%s)' % (output, opt)) 1575 1576 # Allow the expected version or the following one in order avoid needing to update both 1577 # emscripten and binaryen in lock step in emscripten-releases. 1578 if version not in (EXPECTED_BINARYEN_VERSION, EXPECTED_BINARYEN_VERSION + 1): 1579 diagnostics.warning('version-check', 'unexpected binaryen version: %s (expected %s)', version, EXPECTED_BINARYEN_VERSION) 1580 1581 1582def get_binaryen_bin(): 1583 assert Settings.WASM, 'non wasm builds should not ask for binaryen' 1584 global binaryen_checked 1585 rtn = os.path.join(BINARYEN_ROOT, 'bin') 1586 if not binaryen_checked: 1587 check_binaryen(rtn) 1588 binaryen_checked = True 1589 return rtn 1590 1591 1592def run_binaryen_command(tool, infile, outfile=None, args=[], debug=False, stdout=None): 1593 cmd = [os.path.join(get_binaryen_bin(), tool)] 1594 if outfile and tool == 'wasm-opt' and Settings.DEBUG_LEVEL != 3: 1595 # remove any dwarf debug info sections, if the debug level is <3, as 1596 # we don't need them; also remove them if we the level is 4, as then we 1597 # want a source map, which is implemented separately from dwarf. 1598 # note that we add this pass first, so that it doesn't interfere with 1599 # the final set of passes (which may generate stack IR, and nothing 1600 # should be run after that) 1601 # TODO: if lld can strip dwarf then we don't need this. atm though it can 1602 # only strip all debug info or none, which includes the name section 1603 # which we may need 1604 # TODO: once fastcomp is gone, either remove source maps entirely, or 1605 # support them by emitting a source map at the end from the dwarf, 1606 # and use llvm-objcopy to remove that final dwarf 1607 cmd += ['--strip-dwarf'] 1608 cmd += args 1609 if infile: 1610 cmd += [infile] 1611 if outfile: 1612 cmd += ['-o', outfile] 1613 if debug: 1614 cmd += ['-g'] # preserve the debug info 1615 # if the features are not already handled, handle them 1616 if '--detect-features' not in cmd: 1617 cmd += get_binaryen_feature_flags() 1618 print_compiler_stage(cmd) 1619 # if we are emitting a source map, every time we load and save the wasm 1620 # we must tell binaryen to update it 1621 emit_source_map = Settings.DEBUG_LEVEL == 4 and outfile 1622 if emit_source_map: 1623 cmd += ['--input-source-map=' + infile + '.map'] 1624 cmd += ['--output-source-map=' + outfile + '.map'] 1625 ret = check_call(cmd, stdout=stdout).stdout 1626 if outfile: 1627 save_intermediate(outfile, '%s.wasm' % tool) 1628 return ret 1629 1630 1631def run_wasm_opt(*args, **kwargs): 1632 return run_binaryen_command('wasm-opt', *args, **kwargs) 1633 1634 1635save_intermediate_counter = 0 1636 1637 1638def save_intermediate(src, dst): 1639 if DEBUG: 1640 global save_intermediate_counter 1641 dst = 'emcc-%d-%s' % (save_intermediate_counter, dst) 1642 save_intermediate_counter += 1 1643 dst = os.path.join(CANONICAL_TEMP_DIR, dst) 1644 logger.debug('saving debug copy %s' % dst) 1645 shutil.copyfile(src, dst) 1646