1# Copyright 2020 The Emscripten Authors.  All rights reserved.
2# Emscripten is available under two separate licenses, the MIT license and the
3# University of Illinois/NCSA Open Source License.  Both these licenses can be
4# found in the LICENSE file.
5
6from __future__ import print_function
7
8import atexit
9import json
10import logging
11import multiprocessing
12import os
13import re
14import shlex
15import shutil
16import subprocess
17import sys
18import tempfile
19from subprocess import STDOUT, PIPE
20
21from . import diagnostics
22from . import response_file
23from . import shared
24from .toolchain_profiler import ToolchainProfiler
25from .shared import Settings, CLANG_CC, CLANG_CXX, PYTHON
26from .shared import LLVM_NM, EMCC, EMAR, EMXX, EMRANLIB, NODE_JS, WASM_LD, LLVM_AR
27from .shared import LLVM_OPT, LLVM_LINK, LLVM_DIS, LLVM_AS, LLVM_OBJCOPY
28from .shared import try_delete, run_process, check_call, exit_with_error
29from .shared import configuration, path_from_root, EXPECTED_BINARYEN_VERSION
30from .shared import asmjs_mangle, DEBUG, WINDOWS, JAVA
31from .shared import EM_BUILD_VERBOSE, TEMP_DIR, print_compiler_stage, BINARYEN_ROOT
32from .shared import CANONICAL_TEMP_DIR, LLVM_DWARFDUMP, demangle_c_symbol_name, asbytes
33from .shared import get_emscripten_temp_dir, exe_suffix, WebAssembly, which, is_c_symbol
34
35logger = logging.getLogger('building')
36
37#  Building
38multiprocessing_pool = None
39binaryen_checked = False
40
41# internal caches
42internal_nm_cache = {}
43# cache results of nm - it can be slow to run
44uninternal_nm_cache = {}
45# Stores the object files contained in different archive files passed as input
46ar_contents = {}
47_is_ar_cache = {}
48# the exports the user requested
49user_requested_exports = []
50
51
52class ObjectFileInfo(object):
53  def __init__(self, returncode, output, defs=set(), undefs=set(), commons=set()):
54    self.returncode = returncode
55    self.output = output
56    self.defs = defs
57    self.undefs = undefs
58    self.commons = commons
59
60  def is_valid_for_nm(self):
61    return self.returncode == 0
62
63
64# llvm-ar appears to just use basenames inside archives. as a result, files
65# with the same basename will trample each other when we extract them. to help
66# warn of such situations, we warn if there are duplicate entries in the
67# archive
68def warn_if_duplicate_entries(archive_contents, archive_filename):
69  if len(archive_contents) != len(set(archive_contents)):
70    msg = '%s: archive file contains duplicate entries. This is not supported by emscripten. Only the last member with a given name will be linked in which can result in undefined symbols. You should either rename your source files, or use `emar` to create you archives which works around this issue.' % archive_filename
71    warned = set()
72    for i in range(len(archive_contents)):
73      curr = archive_contents[i]
74      if curr not in warned and curr in archive_contents[i + 1:]:
75        msg += '\n   duplicate: %s' % curr
76        warned.add(curr)
77    diagnostics.warning('emcc', msg)
78
79
80# This function creates a temporary directory specified by the 'dir' field in
81# the returned dictionary. Caller is responsible for cleaning up those files
82# after done.
83def extract_archive_contents(archive_file):
84  lines = run_process([LLVM_AR, 't', archive_file], stdout=PIPE).stdout.splitlines()
85  # ignore empty lines
86  contents = [l for l in lines if len(l)]
87  if len(contents) == 0:
88    logger.debug('Archive %s appears to be empty (recommendation: link an .so instead of .a)' % archive_file)
89    return {
90      'returncode': 0,
91      'dir': None,
92      'files': []
93    }
94
95  # `ar` files can only contains filenames. Just to be sure,  verify that each
96  # file has only as filename component and is not absolute
97  for f in contents:
98    assert not os.path.dirname(f)
99    assert not os.path.isabs(f)
100
101  warn_if_duplicate_entries(contents, archive_file)
102
103  # create temp dir
104  temp_dir = tempfile.mkdtemp('_archive_contents', 'emscripten_temp_')
105
106  # extract file in temp dir
107  proc = run_process([LLVM_AR, 'xo', archive_file], stdout=PIPE, stderr=STDOUT, cwd=temp_dir)
108  abs_contents = [os.path.join(temp_dir, c) for c in contents]
109
110  # check that all files were created
111  missing_contents = [x for x in abs_contents if not os.path.exists(x)]
112  if missing_contents:
113    exit_with_error('llvm-ar failed to extract file(s) ' + str(missing_contents) + ' from archive file ' + f + '! Error:' + str(proc.stdout))
114
115  return {
116    'returncode': proc.returncode,
117    'dir': temp_dir,
118    'files': abs_contents
119  }
120
121
122# Due to a python pickling issue, the following two functions must be at top
123# level, or multiprocessing pool spawn won't find them.
124def g_llvm_nm_uncached(filename):
125  return llvm_nm_uncached(filename)
126
127
128def g_multiprocessing_initializer(*args):
129  for item in args:
130    (key, value) = item.split('=', 1)
131    if key == 'EMCC_POOL_CWD':
132      os.chdir(value)
133    else:
134      os.environ[key] = value
135
136
137def unique_ordered(values):
138  """return a list of unique values in an input list, without changing order
139  (list(set(.)) would change order randomly).
140  """
141  seen = set()
142
143  def check(value):
144    if value in seen:
145      return False
146    seen.add(value)
147    return True
148
149  return list(filter(check, values))
150
151
152# clear internal caches. this is not normally needed, except if the clang/LLVM
153# used changes inside this invocation of Building, which can happen in the benchmarker
154# when it compares different builds.
155def clear():
156  internal_nm_cache.clear()
157  uninternal_nm_cache.clear()
158  ar_contents.clear()
159  _is_ar_cache.clear()
160
161
162def get_num_cores():
163  return int(os.environ.get('EMCC_CORES', multiprocessing.cpu_count()))
164
165
166# Multiprocessing pools are very slow to build up and tear down, and having
167# several pools throughout the application has a problem of overallocating
168# child processes. Therefore maintain a single centralized pool that is shared
169# between all pooled task invocations.
170def get_multiprocessing_pool():
171  global multiprocessing_pool
172  if not multiprocessing_pool:
173    cores = get_num_cores()
174    if DEBUG:
175      # When in EMCC_DEBUG mode, only use a single core in the pool, so that
176      # logging is not all jumbled up.
177      cores = 1
178
179    # If running with one core only, create a mock instance of a pool that does not
180    # actually spawn any new subprocesses. Very useful for internal debugging.
181    if cores == 1:
182      class FakeMultiprocessor(object):
183        def map(self, func, tasks, *args, **kwargs):
184          results = []
185          for t in tasks:
186            results += [func(t)]
187          return results
188
189        def map_async(self, func, tasks, *args, **kwargs):
190          class Result:
191            def __init__(self, func, tasks):
192              self.func = func
193              self.tasks = tasks
194
195            def get(self, timeout):
196              results = []
197              for t in tasks:
198                results += [func(t)]
199              return results
200
201          return Result(func, tasks)
202
203      multiprocessing_pool = FakeMultiprocessor()
204    else:
205      child_env = [
206        # Multiprocessing pool children must have their current working
207        # directory set to a safe path that is guaranteed not to die in
208        # between of executing commands, or otherwise the pool children will
209        # have trouble spawning subprocesses of their own.
210        'EMCC_POOL_CWD=' + path_from_root(),
211        # Multiprocessing pool children can't spawn their own linear number of
212        # children, that could cause a quadratic amount of spawned processes.
213        'EMCC_CORES=1'
214      ]
215      multiprocessing_pool = multiprocessing.Pool(processes=cores, initializer=g_multiprocessing_initializer, initargs=child_env)
216
217      def close_multiprocessing_pool():
218        global multiprocessing_pool
219        try:
220          # Shut down the pool explicitly, because leaving that for Python to do at process shutdown is buggy and can generate
221          # noisy "WindowsError: [Error 5] Access is denied" spam which is not fatal.
222          multiprocessing_pool.terminate()
223          multiprocessing_pool.join()
224          multiprocessing_pool = None
225        except OSError as e:
226          # Mute the "WindowsError: [Error 5] Access is denied" errors, raise all others through
227          if not (sys.platform.startswith('win') and isinstance(e, WindowsError) and e.winerror == 5):
228            raise
229      atexit.register(close_multiprocessing_pool)
230
231  return multiprocessing_pool
232
233
234# .. but for Popen, we cannot have doublequotes, so provide functionality to
235# remove them when needed.
236def remove_quotes(arg):
237  if isinstance(arg, list):
238    return [remove_quotes(a) for a in arg]
239
240  if arg.startswith('"') and arg.endswith('"'):
241    return arg[1:-1].replace('\\"', '"')
242  elif arg.startswith("'") and arg.endswith("'"):
243    return arg[1:-1].replace("\\'", "'")
244  else:
245    return arg
246
247
248def get_building_env(cflags=[]):
249  env = os.environ.copy()
250  # point CC etc. to the em* tools.
251  env['CC'] = EMCC
252  env['CXX'] = EMXX
253  env['AR'] = EMAR
254  env['LD'] = EMCC
255  env['NM'] = LLVM_NM
256  env['LDSHARED'] = EMCC
257  env['RANLIB'] = EMRANLIB
258  env['EMSCRIPTEN_TOOLS'] = path_from_root('tools')
259  if cflags:
260    env['CFLAGS'] = env['EMMAKEN_CFLAGS'] = ' '.join(cflags)
261  env['HOST_CC'] = CLANG_CC
262  env['HOST_CXX'] = CLANG_CXX
263  env['HOST_CFLAGS'] = "-W" # if set to nothing, CFLAGS is used, which we don't want
264  env['HOST_CXXFLAGS'] = "-W" # if set to nothing, CXXFLAGS is used, which we don't want
265  env['PKG_CONFIG_LIBDIR'] = path_from_root('system', 'local', 'lib', 'pkgconfig') + os.path.pathsep + path_from_root('system', 'lib', 'pkgconfig')
266  env['PKG_CONFIG_PATH'] = os.environ.get('EM_PKG_CONFIG_PATH', '')
267  env['EMSCRIPTEN'] = path_from_root()
268  env['PATH'] = path_from_root('system', 'bin') + os.pathsep + env['PATH']
269  env['CROSS_COMPILE'] = path_from_root('em') # produces /path/to/emscripten/em , which then can have 'cc', 'ar', etc appended to it
270  return env
271
272
273# Returns a clone of the given environment with all directories that contain
274# sh.exe removed from the PATH.  Used to work around CMake limitation with
275# MinGW Makefiles, where sh.exe is not allowed to be present.
276def remove_sh_exe_from_path(env):
277  env = env.copy()
278  if not WINDOWS:
279    return env
280  path = env['PATH'].split(';')
281  path = [p for p in path if not os.path.exists(os.path.join(p, 'sh.exe'))]
282  env['PATH'] = ';'.join(path)
283  return env
284
285
286def handle_cmake_toolchain(args, env):
287  def has_substr(args, substr):
288    return any(substr in s for s in args)
289
290  # Append the Emscripten toolchain file if the user didn't specify one.
291  if not has_substr(args, '-DCMAKE_TOOLCHAIN_FILE'):
292    args.append('-DCMAKE_TOOLCHAIN_FILE=' + path_from_root('cmake', 'Modules', 'Platform', 'Emscripten.cmake'))
293  node_js = NODE_JS
294
295  if not has_substr(args, '-DCMAKE_CROSSCOMPILING_EMULATOR'):
296    node_js = NODE_JS[0].replace('"', '\"')
297    args.append('-DCMAKE_CROSSCOMPILING_EMULATOR="%s"' % node_js)
298
299  # On Windows specify MinGW Makefiles or ninja if we have them and no other
300  # toolchain was specified, to keep CMake from pulling in a native Visual
301  # Studio, or Unix Makefiles.
302  if WINDOWS and '-G' not in args:
303    if which('mingw32-make'):
304      args += ['-G', 'MinGW Makefiles']
305    elif which('ninja'):
306      args += ['-G', 'Ninja']
307
308  # CMake has a requirement that it wants sh.exe off PATH if MinGW Makefiles
309  # is being used. This happens quite often, so do this automatically on
310  # behalf of the user. See
311  # http://www.cmake.org/Wiki/CMake_MinGW_Compiler_Issues
312  if WINDOWS and 'MinGW Makefiles' in args:
313    env = remove_sh_exe_from_path(env)
314
315  return (args, env)
316
317
318def configure(args, stdout=None, stderr=None, env=None, cflags=[], **kwargs):
319  if env:
320    env = env.copy()
321  else:
322    env = get_building_env(cflags=cflags)
323  if 'cmake' in args[0]:
324    # Note: EMMAKEN_JUST_CONFIGURE shall not be enabled when configuring with
325    #       CMake. This is because CMake does expect to be able to do
326    #       config-time builds with emcc.
327    args, env = handle_cmake_toolchain(args, env)
328  else:
329    # When we configure via a ./configure script, don't do config-time
330    # compilation with emcc, but instead do builds natively with Clang. This
331    # is a heuristic emulation that may or may not work.
332    env['EMMAKEN_JUST_CONFIGURE'] = '1'
333  if EM_BUILD_VERBOSE >= 2:
334    stdout = None
335  if EM_BUILD_VERBOSE >= 1:
336    stderr = None
337  print('configure: ' + ' '.join(args), file=sys.stderr)
338  run_process(args, stdout=stdout, stderr=stderr, env=env, **kwargs)
339
340
341def make(args, stdout=None, stderr=None, env=None, cflags=[], **kwargs):
342  if env is None:
343    env = get_building_env(cflags=cflags)
344
345  # On Windows prefer building with mingw32-make instead of make, if it exists.
346  if WINDOWS:
347    if args[0] == 'make':
348      mingw32_make = which('mingw32-make')
349      if mingw32_make:
350        args[0] = mingw32_make
351
352    if 'mingw32-make' in args[0]:
353      env = remove_sh_exe_from_path(env)
354
355  # On Windows, run the execution through shell to get PATH expansion and
356  # executable extension lookup, e.g. 'sdl2-config' will match with
357  # 'sdl2-config.bat' in PATH.
358  if EM_BUILD_VERBOSE >= 2:
359    stdout = None
360  if EM_BUILD_VERBOSE >= 1:
361    stderr = None
362  print('make: ' + ' '.join(args), file=sys.stderr)
363  run_process(args, stdout=stdout, stderr=stderr, env=env, shell=WINDOWS, **kwargs)
364
365
366def make_paths_absolute(f):
367  if f.startswith('-'):  # skip flags
368    return f
369  else:
370    return os.path.abspath(f)
371
372
373# Runs llvm-nm in parallel for the given list of files.
374# The results are populated in uninternal_nm_cache
375# multiprocessing_pool: An existing multiprocessing pool to reuse for the operation, or None
376# to have the function allocate its own.
377def parallel_llvm_nm(files):
378  with ToolchainProfiler.profile_block('parallel_llvm_nm'):
379    pool = get_multiprocessing_pool()
380    object_contents = pool.map(g_llvm_nm_uncached, files)
381
382    for i, file in enumerate(files):
383      if object_contents[i].returncode != 0:
384        logger.debug('llvm-nm failed on file ' + file + ': return code ' + str(object_contents[i].returncode) + ', error: ' + object_contents[i].output)
385      uninternal_nm_cache[file] = object_contents[i]
386    return object_contents
387
388
389def read_link_inputs(files):
390  with ToolchainProfiler.profile_block('read_link_inputs'):
391    # Before performing the link, we need to look at each input file to determine which symbols
392    # each of them provides. Do this in multiple parallel processes.
393    archive_names = [] # .a files passed in to the command line to the link
394    object_names = [] # .o/.bc files passed in to the command line to the link
395    for f in files:
396      absolute_path_f = make_paths_absolute(f)
397
398      if absolute_path_f not in ar_contents and is_ar(absolute_path_f):
399        archive_names.append(absolute_path_f)
400      elif absolute_path_f not in uninternal_nm_cache and is_bitcode(absolute_path_f):
401        object_names.append(absolute_path_f)
402
403    # Archives contain objects, so process all archives first in parallel to obtain the object files in them.
404    pool = get_multiprocessing_pool()
405    object_names_in_archives = pool.map(extract_archive_contents, archive_names)
406
407    def clean_temporary_archive_contents_directory(directory):
408      def clean_at_exit():
409        try_delete(directory)
410      if directory:
411        atexit.register(clean_at_exit)
412
413    for n in range(len(archive_names)):
414      if object_names_in_archives[n]['returncode'] != 0:
415        raise Exception('llvm-ar failed on archive ' + archive_names[n] + '!')
416      ar_contents[archive_names[n]] = object_names_in_archives[n]['files']
417      clean_temporary_archive_contents_directory(object_names_in_archives[n]['dir'])
418
419    for o in object_names_in_archives:
420      for f in o['files']:
421        if f not in uninternal_nm_cache:
422          object_names.append(f)
423
424    # Next, extract symbols from all object files (either standalone or inside archives we just extracted)
425    # The results are not used here directly, but populated to llvm-nm cache structure.
426    parallel_llvm_nm(object_names)
427
428
429def llvm_backend_args():
430  # disable slow and relatively unimportant optimization passes
431  args = ['-combiner-global-alias-analysis=false']
432
433  # asm.js-style exception handling
434  if Settings.DISABLE_EXCEPTION_CATCHING != 1:
435    args += ['-enable-emscripten-cxx-exceptions']
436  if Settings.DISABLE_EXCEPTION_CATCHING == 2:
437    allowed = ','.join(Settings.EXCEPTION_CATCHING_ALLOWED or ['__fake'])
438    args += ['-emscripten-cxx-exceptions-allowed=' + allowed]
439
440  # asm.js-style setjmp/longjmp handling
441  args += ['-enable-emscripten-sjlj']
442
443  # better (smaller, sometimes faster) codegen, see binaryen#1054
444  # and https://bugs.llvm.org/show_bug.cgi?id=39488
445  args += ['-disable-lsr']
446
447  return args
448
449
450def link_to_object(linker_inputs, target):
451  # link using lld for the wasm backend with wasm object files,
452  # other otherwise for linking of bitcode we must use our python
453  # code (necessary for asm.js, for wasm bitcode see
454  # https://bugs.llvm.org/show_bug.cgi?id=40654)
455  if not Settings.LTO:
456    link_lld(linker_inputs + ['--relocatable'], target)
457  else:
458    link(linker_inputs, target)
459
460
461def link_llvm(linker_inputs, target):
462  # runs llvm-link to link things.
463  cmd = [LLVM_LINK] + linker_inputs + ['-o', target]
464  cmd = get_command_with_possible_response_file(cmd)
465  print_compiler_stage(cmd)
466  output = run_process(cmd, stdout=PIPE).stdout
467  assert os.path.exists(target) and (output is None or 'Could not open input file' not in output), 'Linking error: ' + output
468  return target
469
470
471def lld_flags_for_executable(external_symbol_list):
472  cmd = []
473  if external_symbol_list:
474    undefs = configuration.get_temp_files().get('.undefined').name
475    with open(undefs, 'w') as f:
476      f.write('\n'.join(external_symbol_list))
477    cmd.append('--allow-undefined-file=%s' % undefs)
478  else:
479    cmd.append('--allow-undefined')
480
481  # wasi does not import the memory (but for JS it is efficient to do so,
482  # as it allows us to set up memory, preload files, etc. even before the
483  # wasm module arrives)
484  if not Settings.STANDALONE_WASM:
485    cmd.append('--import-memory')
486    cmd.append('--import-table')
487  else:
488    cmd.append('--export-table')
489
490  if Settings.USE_PTHREADS:
491    cmd.append('--shared-memory')
492
493  # wasm-ld can strip debug info for us. this strips both the Names
494  # section and DWARF, so we can only use it when we don't need any of
495  # those things.
496  if Settings.DEBUG_LEVEL < 2 and (not Settings.EMIT_SYMBOL_MAP and
497                                   not Settings.PROFILING_FUNCS and
498                                   not Settings.ASYNCIFY):
499    cmd.append('--strip-debug')
500
501  if Settings.RELOCATABLE:
502    if Settings.MAIN_MODULE == 2 or Settings.SIDE_MODULE == 2:
503      cmd.append('--no-export-dynamic')
504    else:
505      cmd.append('--no-gc-sections')
506      cmd.append('--export-dynamic')
507
508  if Settings.LINKABLE:
509    cmd.append('--export-all')
510  else:
511    c_exports = [e for e in Settings.EXPORTED_FUNCTIONS if is_c_symbol(e)]
512    # Strip the leading underscores
513    c_exports = [demangle_c_symbol_name(e) for e in c_exports]
514    if external_symbol_list:
515      # Filter out symbols external/JS symbols
516      c_exports = [e for e in c_exports if e not in external_symbol_list]
517    for export in c_exports:
518      cmd += ['--export', export]
519
520  if Settings.RELOCATABLE:
521    cmd.append('--experimental-pic')
522    if Settings.SIDE_MODULE:
523      cmd.append('-shared')
524    else:
525      cmd.append('-pie')
526
527  if not Settings.SIDE_MODULE:
528    cmd += [
529      '-z', 'stack-size=%s' % Settings.TOTAL_STACK,
530      '--initial-memory=%d' % Settings.INITIAL_MEMORY,
531    ]
532
533    if Settings.STANDALONE_WASM:
534      # when Settings.EXPECT_MAIN is set we fall back to wasm-ld default of _start
535      if not Settings.EXPECT_MAIN:
536        cmd += ['--entry=_initialize']
537    else:
538      if Settings.EXPECT_MAIN and not Settings.IGNORE_MISSING_MAIN:
539        cmd += ['--entry=main']
540      else:
541        cmd += ['--no-entry']
542    if not Settings.ALLOW_MEMORY_GROWTH:
543      cmd.append('--max-memory=%d' % Settings.INITIAL_MEMORY)
544    elif Settings.MAXIMUM_MEMORY != -1:
545      cmd.append('--max-memory=%d' % Settings.MAXIMUM_MEMORY)
546    if not Settings.RELOCATABLE:
547      cmd.append('--global-base=%s' % Settings.GLOBAL_BASE)
548
549  return cmd
550
551
552def link_lld(args, target, external_symbol_list=None):
553  if not os.path.exists(WASM_LD):
554    exit_with_error('linker binary not found in LLVM directory: %s', WASM_LD)
555  # runs lld to link things.
556  # lld doesn't currently support --start-group/--end-group since the
557  # semantics are more like the windows linker where there is no need for
558  # grouping.
559  args = [a for a in args if a not in ('--start-group', '--end-group')]
560
561  # Emscripten currently expects linkable output (SIDE_MODULE/MAIN_MODULE) to
562  # include all archive contents.
563  if Settings.LINKABLE:
564    args.insert(0, '--whole-archive')
565    args.append('--no-whole-archive')
566
567  if Settings.STRICT:
568    args.append('--fatal-warnings')
569
570  cmd = [WASM_LD, '-o', target] + args
571  for a in llvm_backend_args():
572    cmd += ['-mllvm', a]
573
574  # For relocatable output (generating an object file) we don't pass any of the
575  # normal linker flags that are used when building and exectuable
576  if '--relocatable' not in args and '-r' not in args:
577    cmd += lld_flags_for_executable(external_symbol_list)
578
579  print_compiler_stage(cmd)
580  cmd = get_command_with_possible_response_file(cmd)
581  check_call(cmd)
582  return target
583
584
585def link(files, target, force_archive_contents=False, just_calculate=False):
586  # "Full-featured" linking: looks into archives (duplicates lld functionality)
587  actual_files = []
588  # Tracking unresolveds is necessary for .a linking, see below.
589  # Specify all possible entry points to seed the linking process.
590  # For a simple application, this would just be "main".
591  unresolved_symbols = set([func[1:] for func in Settings.EXPORTED_FUNCTIONS])
592  resolved_symbols = set()
593  # Paths of already included object files from archives.
594  added_contents = set()
595  has_ar = False
596  for f in files:
597    if not f.startswith('-'):
598      has_ar = has_ar or is_ar(make_paths_absolute(f))
599
600  # If we have only one archive or the force_archive_contents flag is set,
601  # then we will add every object file we see, regardless of whether it
602  # resolves any undefined symbols.
603  force_add_all = len(files) == 1 or force_archive_contents
604
605  # Considers an object file for inclusion in the link. The object is included
606  # if force_add=True or if the object provides a currently undefined symbol.
607  # If the object is included, the symbol tables are updated and the function
608  # returns True.
609  def consider_object(f, force_add=False):
610    new_symbols = llvm_nm(f)
611    # Check if the object was valid according to llvm-nm. It also accepts
612    # native object files.
613    if not new_symbols.is_valid_for_nm():
614      diagnostics.warning('emcc', 'object %s is not valid according to llvm-nm, cannot link', f)
615      return False
616    # Check the object is valid for us, and not a native object file.
617    if not is_bitcode(f):
618      exit_with_error('unknown file type: %s', f)
619    provided = new_symbols.defs.union(new_symbols.commons)
620    do_add = force_add or not unresolved_symbols.isdisjoint(provided)
621    if do_add:
622      logger.debug('adding object %s to link (forced: %d)' % (f, force_add))
623      # Update resolved_symbols table with newly resolved symbols
624      resolved_symbols.update(provided)
625      # Update unresolved_symbols table by adding newly unresolved symbols and
626      # removing newly resolved symbols.
627      unresolved_symbols.update(new_symbols.undefs.difference(resolved_symbols))
628      unresolved_symbols.difference_update(provided)
629      actual_files.append(f)
630    return do_add
631
632  # Traverse a single archive. The object files are repeatedly scanned for
633  # newly satisfied symbols until no new symbols are found. Returns true if
634  # any object files were added to the link.
635  def consider_archive(f, force_add):
636    added_any_objects = False
637    loop_again = True
638    logger.debug('considering archive %s' % (f))
639    contents = ar_contents[f]
640    while loop_again: # repeatedly traverse until we have everything we need
641      loop_again = False
642      for content in contents:
643        if content in added_contents:
644          continue
645        # Link in the .o if it provides symbols, *or* this is a singleton archive (which is
646        # apparently an exception in gcc ld)
647        if consider_object(content, force_add=force_add):
648          added_contents.add(content)
649          loop_again = True
650          added_any_objects = True
651    logger.debug('done running loop of archive %s' % (f))
652    return added_any_objects
653
654  read_link_inputs([x for x in files if not x.startswith('-')])
655
656  # Rescan a group of archives until we don't find any more objects to link.
657  def scan_archive_group(group):
658    loop_again = True
659    logger.debug('starting archive group loop')
660    while loop_again:
661      loop_again = False
662      for archive in group:
663        if consider_archive(archive, force_add=False):
664          loop_again = True
665    logger.debug('done with archive group loop')
666
667  current_archive_group = None
668  in_whole_archive = False
669  for f in files:
670    absolute_path_f = make_paths_absolute(f)
671    if f.startswith('-'):
672      if f in ['--start-group', '-(']:
673        assert current_archive_group is None, 'Nested --start-group, missing --end-group?'
674        current_archive_group = []
675      elif f in ['--end-group', '-)']:
676        assert current_archive_group is not None, '--end-group without --start-group'
677        scan_archive_group(current_archive_group)
678        current_archive_group = None
679      elif f in ['--whole-archive', '-whole-archive']:
680        in_whole_archive = True
681      elif f in ['--no-whole-archive', '-no-whole-archive']:
682        in_whole_archive = False
683      else:
684        # Command line flags should already be vetted by the time this method
685        # is called, so this is an internal error
686        assert False, 'unsupported link flag: ' + f
687    elif is_ar(absolute_path_f):
688      # Extract object files from ar archives, and link according to gnu ld semantics
689      # (link in an entire .o from the archive if it supplies symbols still unresolved)
690      consider_archive(absolute_path_f, in_whole_archive or force_add_all)
691      # If we're inside a --start-group/--end-group section, add to the list
692      # so we can loop back around later.
693      if current_archive_group is not None:
694        current_archive_group.append(absolute_path_f)
695    elif is_bitcode(absolute_path_f):
696      if has_ar:
697        consider_object(f, force_add=True)
698      else:
699        # If there are no archives then we can simply link all valid object
700        # files and skip the symbol table stuff.
701        actual_files.append(f)
702    else:
703      exit_with_error('unknown file type: %s', f)
704
705  # We have to consider the possibility that --start-group was used without a matching
706  # --end-group; GNU ld permits this behavior and implicitly treats the end of the
707  # command line as having an --end-group.
708  if current_archive_group:
709    logger.debug('--start-group without matching --end-group, rescanning')
710    scan_archive_group(current_archive_group)
711    current_archive_group = None
712
713  try_delete(target)
714
715  # Finish link
716  # tolerate people trying to link a.so a.so etc.
717  actual_files = unique_ordered(actual_files)
718  if just_calculate:
719    # just calculating; return the link arguments which is the final list of files to link
720    return actual_files
721
722  logger.debug('emcc: linking: %s to %s', actual_files, target)
723  link_llvm(actual_files, target)
724  return target
725
726
727def get_command_with_possible_response_file(cmd):
728  # 8k is a bit of an arbitrary limit, but a reasonable one
729  # for max command line size before we use a response file
730  if len(' '.join(cmd)) <= 8192:
731    return cmd
732
733  logger.debug('using response file for %s' % cmd[0])
734  filename = response_file.create_response_file(cmd[1:], TEMP_DIR)
735  new_cmd = [cmd[0], "@" + filename]
736  return new_cmd
737
738
739# LLVM optimizations
740# @param opt A list of LLVM optimization parameters
741def llvm_opt(filename, opts, out=None):
742  inputs = filename
743  if not isinstance(inputs, list):
744    inputs = [inputs]
745  else:
746    assert out, 'must provide out if llvm_opt on a list of inputs'
747  assert len(opts), 'should not call opt with nothing to do'
748  opts = opts[:]
749
750  target = out or (filename + '.opt.bc')
751  cmd = [LLVM_OPT] + inputs + opts + ['-o', target]
752  cmd = get_command_with_possible_response_file(cmd)
753  print_compiler_stage(cmd)
754  check_call(cmd)
755  assert os.path.exists(target), 'llvm optimizer emitted no output.'
756  if not out:
757    shutil.move(filename + '.opt.bc', filename)
758  return target
759
760
761def llvm_dis(input_filename, output_filename):
762  # LLVM binary ==> LLVM assembly
763  try_delete(output_filename)
764  output = run_process([LLVM_DIS, input_filename, '-o', output_filename], stdout=PIPE).stdout
765  assert os.path.exists(output_filename), 'Could not create .ll file: ' + output
766
767
768def llvm_as(input_filename, output_filename):
769  # LLVM assembly ==> LLVM binary
770  try_delete(output_filename)
771  output = run_process([LLVM_AS, input_filename, '-o', output_filename], stdout=PIPE).stdout
772  assert os.path.exists(output_filename), 'Could not create bc file: ' + output
773
774
775def parse_symbols(output, include_internal=False):
776  defs = []
777  undefs = []
778  commons = []
779  for line in output.split('\n'):
780    if not line or line[0] == '#':
781      continue
782    # e.g.  filename.o:  , saying which file it's from
783    if ':' in line:
784      continue
785    parts = [seg for seg in line.split(' ') if len(seg)]
786    # pnacl-nm will print zero offsets for bitcode, and newer llvm-nm will print present symbols
787    # as  -------- T name
788    if len(parts) == 3 and parts[0] == "--------" or re.match(r'^[\da-f]{8}$', parts[0]):
789      parts.pop(0)
790    if len(parts) == 2:
791      # ignore lines with absolute offsets, these are not bitcode anyhow
792      # e.g. |00000630 t d_source_name|
793      status, symbol = parts
794      if status == 'U':
795        undefs.append(symbol)
796      elif status == 'C':
797        commons.append(symbol)
798      elif (not include_internal and status == status.upper()) or \
799           (include_internal and status in ['W', 't', 'T', 'd', 'D']):
800        # FIXME: using WTD in the previous line fails due to llvm-nm behavior on macOS,
801        #        so for now we assume all uppercase are normally defined external symbols
802        defs.append(symbol)
803  return ObjectFileInfo(0, None, set(defs), set(undefs), set(commons))
804
805
806def llvm_nm_uncached(filename, stdout=PIPE, stderr=PIPE, include_internal=False):
807  # LLVM binary ==> list of symbols
808  proc = run_process([LLVM_NM, filename], stdout=stdout, stderr=stderr, check=False)
809  if proc.returncode == 0:
810    return parse_symbols(proc.stdout, include_internal)
811  else:
812    return ObjectFileInfo(proc.returncode, str(proc.stdout) + str(proc.stderr))
813
814
815def llvm_nm(filename, stdout=PIPE, stderr=PIPE, include_internal=False):
816  # Always use absolute paths to maximize cache usage
817  filename = os.path.abspath(filename)
818
819  if include_internal and filename in internal_nm_cache:
820    return internal_nm_cache[filename]
821  elif not include_internal and filename in uninternal_nm_cache:
822    return uninternal_nm_cache[filename]
823
824  ret = llvm_nm_uncached(filename, stdout, stderr, include_internal)
825
826  if ret.returncode != 0:
827    logger.debug('llvm-nm failed on file ' + filename + ': return code ' + str(ret.returncode) + ', error: ' + ret.output)
828
829  # Even if we fail, write the results to the NM cache so that we don't keep trying to llvm-nm the failing file again later.
830  if include_internal:
831    internal_nm_cache[filename] = ret
832  else:
833    uninternal_nm_cache[filename] = ret
834
835  return ret
836
837
838def emcc(filename, args=[], output_filename=None, stdout=None, stderr=None, env=None):
839  if output_filename is None:
840    output_filename = filename + '.o'
841  try_delete(output_filename)
842  run_process([EMCC, filename] + args + ['-o', output_filename], stdout=stdout, stderr=stderr, env=env)
843
844
845def emar(action, output_filename, filenames, stdout=None, stderr=None, env=None):
846  try_delete(output_filename)
847  response_filename = response_file.create_response_file(filenames, TEMP_DIR)
848  cmd = [EMAR, action, output_filename] + ['@' + response_filename]
849  try:
850    run_process(cmd, stdout=stdout, stderr=stderr, env=env)
851  finally:
852    try_delete(response_filename)
853
854  if 'c' in action:
855    assert os.path.exists(output_filename), 'emar could not create output file: ' + output_filename
856
857
858def can_inline():
859  return Settings.INLINING_LIMIT == 0
860
861
862def get_safe_internalize():
863  if Settings.LINKABLE:
864    return [] # do not internalize anything
865
866  exps = Settings.EXPORTED_FUNCTIONS
867  internalize_public_api = '-internalize-public-api-'
868  internalize_list = ','.join([demangle_c_symbol_name(exp) for exp in exps])
869
870  # EXPORTED_FUNCTIONS can potentially be very large.
871  # 8k is a bit of an arbitrary limit, but a reasonable one
872  # for max command line size before we use a response file
873  if len(internalize_list) > 8192:
874    logger.debug('using response file for EXPORTED_FUNCTIONS in internalize')
875    finalized_exports = '\n'.join([exp[1:] for exp in exps])
876    internalize_list_file = configuration.get_temp_files().get('.response').name
877    with open(internalize_list_file, 'w') as f:
878      f.write(finalized_exports)
879    internalize_public_api += 'file=' + internalize_list_file
880  else:
881    internalize_public_api += 'list=' + internalize_list
882
883  # internalize carefully, llvm 3.2 will remove even main if not told not to
884  return ['-internalize', internalize_public_api]
885
886
887def opt_level_to_str(opt_level, shrink_level=0):
888  # convert opt_level/shrink_level pair to a string argument like -O1
889  if opt_level == 0:
890    return '-O0'
891  if shrink_level == 1:
892    return '-Os'
893  elif shrink_level >= 2:
894    return '-Oz'
895  else:
896    return '-O' + str(min(opt_level, 3))
897
898
899def js_optimizer(filename, passes):
900  from . import js_optimizer
901  try:
902    return js_optimizer.run(filename, passes)
903  except subprocess.CalledProcessError as e:
904    exit_with_error("'%s' failed (%d)", ' '.join(e.cmd), e.returncode)
905
906
907# run JS optimizer on some JS, ignoring asm.js contents if any - just run on it all
908def acorn_optimizer(filename, passes, extra_info=None, return_output=False):
909  optimizer = path_from_root('tools', 'acorn-optimizer.js')
910  original_filename = filename
911  if extra_info is not None:
912    temp_files = configuration.get_temp_files()
913    temp = temp_files.get('.js').name
914    shutil.copyfile(filename, temp)
915    with open(temp, 'a') as f:
916      f.write('// EXTRA_INFO: ' + extra_info)
917    filename = temp
918  cmd = NODE_JS + [optimizer, filename] + passes
919  # Keep JS code comments intact through the acorn optimization pass so that JSDoc comments
920  # will be carried over to a later Closure run.
921  if Settings.USE_CLOSURE_COMPILER:
922    cmd += ['--closureFriendly']
923  if not return_output:
924    next = original_filename + '.jso.js'
925    configuration.get_temp_files().note(next)
926    check_call(cmd, stdout=open(next, 'w'))
927    save_intermediate(next, '%s.js' % passes[0])
928    return next
929  output = check_call(cmd, stdout=PIPE).stdout
930  return output
931
932
933# evals ctors. if binaryen_bin is provided, it is the dir of the binaryen tool
934# for this, and we are in wasm mode
935def eval_ctors(js_file, binary_file, binaryen_bin='', debug_info=False):
936  logger.debug('Ctor evalling in the wasm backend is disabled due to https://github.com/emscripten-core/emscripten/issues/9527')
937  return
938  # TODO re-enable
939  # cmd = [PYTHON, path_from_root('tools', 'ctor_evaller.py'), js_file, binary_file, str(Settings.INITIAL_MEMORY), str(Settings.TOTAL_STACK), str(Settings.GLOBAL_BASE), binaryen_bin, str(int(debug_info))]
940  # if binaryen_bin:
941  #   cmd += get_binaryen_feature_flags()
942  # print_compiler_stage(cmd)
943  # check_call(cmd)
944
945
946def get_closure_compiler():
947  # First check if the user configured a specific CLOSURE_COMPILER in thier settings
948  if shared.CLOSURE_COMPILER:
949    return shared.CLOSURE_COMPILER
950
951  # Otherwise use the one installed vai npm
952  cmd = shared.get_npm_cmd('google-closure-compiler')
953  if not WINDOWS:
954    # Work around an issue that Closure compiler can take up a lot of memory and crash in an error
955    # "FATAL ERROR: Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap
956    # out of memory"
957    cmd.insert(-1, '--max_old_space_size=8192')
958  return cmd
959
960
961def check_closure_compiler(cmd, args, env):
962  try:
963    output = run_process(cmd + args + ['--version'], stdout=PIPE, env=env).stdout
964  except Exception as e:
965    logger.warn(str(e))
966    exit_with_error('closure compiler ("%s --version") did not execute properly!' % str(cmd))
967
968  if 'Version:' not in output:
969    exit_with_error('unrecognized closure compiler --version output (%s):\n%s' % (str(cmd), output))
970
971
972def closure_compiler(filename, pretty=True, advanced=True, extra_closure_args=None):
973  with ToolchainProfiler.profile_block('closure_compiler'):
974    env = shared.env_with_node_in_path()
975    user_args = []
976    env_args = os.environ.get('EMCC_CLOSURE_ARGS')
977    if env_args:
978      user_args += shlex.split(env_args)
979    if extra_closure_args:
980      user_args += extra_closure_args
981
982    # Closure compiler expects JAVA_HOME to be set *and* java.exe to be in the PATH in order
983    # to enable use the java backend.  Without this it will only try the native and JavaScript
984    # versions of the compiler.
985    java_bin = os.path.dirname(JAVA)
986    if java_bin:
987      def add_to_path(dirname):
988        env['PATH'] = env['PATH'] + os.pathsep + dirname
989      add_to_path(java_bin)
990      java_home = os.path.dirname(java_bin)
991      env.setdefault('JAVA_HOME', java_home)
992
993    if WINDOWS and not any(a.startswith('--platform') for a in user_args):
994      # Disable native compiler on windows until upstream issue is fixed:
995      # https://github.com/google/closure-compiler-npm/issues/147
996      user_args.append('--platform=java')
997
998    closure_cmd = get_closure_compiler()
999    check_closure_compiler(closure_cmd, user_args, env)
1000
1001    # Closure externs file contains known symbols to be extern to the minification, Closure
1002    # should not minify these symbol names.
1003    CLOSURE_EXTERNS = [path_from_root('src', 'closure-externs', 'closure-externs.js')]
1004
1005    # Closure compiler needs to know about all exports that come from the asm.js/wasm module, because to optimize for small code size,
1006    # the exported symbols are added to global scope via a foreach loop in a way that evades Closure's static analysis. With an explicit
1007    # externs file for the exports, Closure is able to reason about the exports.
1008    if Settings.MODULE_EXPORTS and not Settings.DECLARE_ASM_MODULE_EXPORTS:
1009      # Generate an exports file that records all the exported symbols from asm.js/wasm module.
1010      module_exports_suppressions = '\n'.join(['/**\n * @suppress {duplicate, undefinedVars}\n */\nvar %s;\n' % i for i, j in Settings.MODULE_EXPORTS])
1011      exports_file = configuration.get_temp_files().get('_module_exports.js')
1012      exports_file.write(module_exports_suppressions.encode())
1013      exports_file.close()
1014
1015      CLOSURE_EXTERNS += [exports_file.name]
1016
1017    # Node.js specific externs
1018    if Settings.target_environment_may_be('node'):
1019      NODE_EXTERNS_BASE = path_from_root('third_party', 'closure-compiler', 'node-externs')
1020      NODE_EXTERNS = os.listdir(NODE_EXTERNS_BASE)
1021      NODE_EXTERNS = [os.path.join(NODE_EXTERNS_BASE, name) for name in NODE_EXTERNS
1022                      if name.endswith('.js')]
1023      CLOSURE_EXTERNS += [path_from_root('src', 'closure-externs', 'node-externs.js')] + NODE_EXTERNS
1024
1025    # V8/SpiderMonkey shell specific externs
1026    if Settings.target_environment_may_be('shell'):
1027      V8_EXTERNS = [path_from_root('src', 'closure-externs', 'v8-externs.js')]
1028      SPIDERMONKEY_EXTERNS = [path_from_root('src', 'closure-externs', 'spidermonkey-externs.js')]
1029      CLOSURE_EXTERNS += V8_EXTERNS + SPIDERMONKEY_EXTERNS
1030
1031    # Web environment specific externs
1032    if Settings.target_environment_may_be('web') or Settings.target_environment_may_be('worker'):
1033      BROWSER_EXTERNS_BASE = path_from_root('src', 'closure-externs', 'browser-externs')
1034      if os.path.isdir(BROWSER_EXTERNS_BASE):
1035        BROWSER_EXTERNS = os.listdir(BROWSER_EXTERNS_BASE)
1036        BROWSER_EXTERNS = [os.path.join(BROWSER_EXTERNS_BASE, name) for name in BROWSER_EXTERNS
1037                           if name.endswith('.js')]
1038        CLOSURE_EXTERNS += BROWSER_EXTERNS
1039
1040    if Settings.MINIMAL_RUNTIME and Settings.USE_PTHREADS and not Settings.MODULARIZE:
1041      CLOSURE_EXTERNS += [path_from_root('src', 'minimal_runtime_worker_externs.js')]
1042    outfile = filename + '.cc.js'
1043    configuration.get_temp_files().note(outfile)
1044
1045    args = ['--compilation_level', 'ADVANCED_OPTIMIZATIONS' if advanced else 'SIMPLE_OPTIMIZATIONS']
1046    # Keep in sync with ecmaVersion in tools/acorn-optimizer.js
1047    args += ['--language_in', 'ECMASCRIPT_2018']
1048    # Tell closure not to do any transpiling or inject any polyfills.
1049    # At some point we may want to look into using this as way to convert to ES5 but
1050    # babel is perhaps a better tool for that.
1051    args += ['--language_out', 'NO_TRANSPILE']
1052    # Tell closure never to inject the 'use strict' directive.
1053    args += ['--emit_use_strict=false']
1054
1055    for e in CLOSURE_EXTERNS:
1056      args += ['--externs', e]
1057    args += ['--js_output_file', outfile]
1058
1059    if Settings.IGNORE_CLOSURE_COMPILER_ERRORS:
1060      args.append('--jscomp_off=*')
1061    if pretty:
1062      args += ['--formatting', 'PRETTY_PRINT']
1063    args += ['--js', filename]
1064    cmd = closure_cmd + args + user_args
1065    logger.debug('closure compiler: ' + ' '.join(cmd))
1066
1067    proc = run_process(cmd, stderr=PIPE, check=False, env=env)
1068
1069    # XXX Closure bug: if Closure is invoked with --create_source_map, Closure should create a
1070    # outfile.map source map file (https://github.com/google/closure-compiler/wiki/Source-Maps)
1071    # But it looks like it creates such files on Linux(?) even without setting that command line
1072    # flag (and currently we don't), so delete the produced source map file to not leak files in
1073    # temp directory.
1074    try_delete(outfile + '.map')
1075
1076    # Print Closure diagnostics result up front.
1077    if proc.returncode != 0:
1078      logger.error('Closure compiler run failed:\n')
1079    elif len(proc.stderr.strip()) > 0:
1080      if Settings.CLOSURE_WARNINGS == 'error':
1081        logger.error('Closure compiler completed with warnings and -s CLOSURE_WARNINGS=error enabled, aborting!\n')
1082      elif Settings.CLOSURE_WARNINGS == 'warn':
1083        logger.warn('Closure compiler completed with warnings:\n')
1084
1085    # Print input file (long wall of text!)
1086    if DEBUG == 2 and (proc.returncode != 0 or (len(proc.stderr.strip()) > 0 and Settings.CLOSURE_WARNINGS != 'quiet')):
1087      input_file = open(filename, 'r').read().splitlines()
1088      for i in range(len(input_file)):
1089        sys.stderr.write(str(i + 1) + ': ' + input_file[i] + '\n')
1090
1091    if proc.returncode != 0:
1092      logger.error(proc.stderr) # print list of errors (possibly long wall of text if input was minified)
1093
1094      # Exit and print final hint to get clearer output
1095      msg = 'closure compiler failed (rc: %d): %s' % (proc.returncode, shared.shlex_join(cmd))
1096      if not pretty:
1097        msg += ' the error message may be clearer with -g1 and EMCC_DEBUG=2 set'
1098      exit_with_error(msg)
1099
1100    if len(proc.stderr.strip()) > 0 and Settings.CLOSURE_WARNINGS != 'quiet':
1101      # print list of warnings (possibly long wall of text if input was minified)
1102      if Settings.CLOSURE_WARNINGS == 'error':
1103        logger.error(proc.stderr)
1104      else:
1105        logger.warn(proc.stderr)
1106
1107      # Exit and/or print final hint to get clearer output
1108      if not pretty:
1109        logger.warn('(rerun with -g1 linker flag for an unminified output)')
1110      elif DEBUG != 2:
1111        logger.warn('(rerun with EMCC_DEBUG=2 enabled to dump Closure input file)')
1112
1113      if Settings.CLOSURE_WARNINGS == 'error':
1114        exit_with_error('closure compiler produced warnings and -s CLOSURE_WARNINGS=error enabled')
1115
1116    return outfile
1117
1118
1119# minify the final wasm+JS combination. this is done after all the JS
1120# and wasm optimizations; here we do the very final optimizations on them
1121def minify_wasm_js(js_file, wasm_file, expensive_optimizations, minify_whitespace, debug_info):
1122  # start with JSDCE, to clean up obvious JS garbage. When optimizing for size,
1123  # use AJSDCE (aggressive JS DCE, performs multiple iterations). Clean up
1124  # whitespace if necessary too.
1125  passes = []
1126  if not Settings.LINKABLE:
1127    passes.append('JSDCE' if not expensive_optimizations else 'AJSDCE')
1128  if minify_whitespace:
1129    passes.append('minifyWhitespace')
1130  if passes:
1131    logger.debug('running cleanup on shell code: ' + ' '.join(passes))
1132    js_file = acorn_optimizer(js_file, passes)
1133  # if we can optimize this js+wasm combination under the assumption no one else
1134  # will see the internals, do so
1135  if not Settings.LINKABLE:
1136    # if we are optimizing for size, shrink the combined wasm+JS
1137    # TODO: support this when a symbol map is used
1138    if expensive_optimizations:
1139      js_file = metadce(js_file, wasm_file, minify_whitespace=minify_whitespace, debug_info=debug_info)
1140      # now that we removed unneeded communication between js and wasm, we can clean up
1141      # the js some more.
1142      passes = ['AJSDCE']
1143      if minify_whitespace:
1144        passes.append('minifyWhitespace')
1145      logger.debug('running post-meta-DCE cleanup on shell code: ' + ' '.join(passes))
1146      js_file = acorn_optimizer(js_file, passes)
1147      if Settings.MINIFY_WASM_IMPORTS_AND_EXPORTS:
1148        js_file = minify_wasm_imports_and_exports(js_file, wasm_file, minify_whitespace=minify_whitespace, minify_exports=Settings.MINIFY_ASMJS_EXPORT_NAMES, debug_info=debug_info)
1149  return js_file
1150
1151
1152# run binaryen's wasm-metadce to dce both js and wasm
1153def metadce(js_file, wasm_file, minify_whitespace, debug_info):
1154  logger.debug('running meta-DCE')
1155  temp_files = configuration.get_temp_files()
1156  # first, get the JS part of the graph
1157  extra_info = '{ "exports": [' + ','.join(map(lambda x: '["' + x[0] + '","' + x[1] + '"]', Settings.MODULE_EXPORTS)) + ']}'
1158  txt = acorn_optimizer(js_file, ['emitDCEGraph', 'noPrint'], return_output=True, extra_info=extra_info)
1159  graph = json.loads(txt)
1160  # add exports based on the backend output, that are not present in the JS
1161  if not Settings.DECLARE_ASM_MODULE_EXPORTS:
1162    exports = set()
1163    for item in graph:
1164      if 'export' in item:
1165        exports.add(item['export'])
1166    for export, unminified in Settings.MODULE_EXPORTS:
1167      if export not in exports:
1168        graph.append({
1169          'export': export,
1170          'name': 'emcc$export$' + export,
1171          'reaches': []
1172        })
1173  # ensure that functions expected to be exported to the outside are roots
1174  for item in graph:
1175    if 'export' in item:
1176      export = item['export']
1177      # wasm backend's exports are prefixed differently inside the wasm
1178      export = asmjs_mangle(export)
1179      if export in user_requested_exports or Settings.EXPORT_ALL:
1180        item['root'] = True
1181  # in standalone wasm, always export the memory
1182  if Settings.STANDALONE_WASM:
1183    graph.append({
1184      'export': 'memory',
1185      'name': 'emcc$export$memory',
1186      'reaches': [],
1187      'root': True
1188    })
1189    graph.append({
1190      'export': '__indirect_function_table',
1191      'name': 'emcc$export$__indirect_function_table',
1192      'reaches': [],
1193      'root': True
1194    })
1195  # fix wasi imports TODO: support wasm stable with an option?
1196  WASI_IMPORTS = set([
1197    'environ_get',
1198    'environ_sizes_get',
1199    'args_get',
1200    'args_sizes_get',
1201    'fd_write',
1202    'fd_close',
1203    'fd_read',
1204    'fd_seek',
1205    'fd_fdstat_get',
1206    'fd_sync',
1207    'proc_exit',
1208    'clock_res_get',
1209    'clock_time_get',
1210  ])
1211  for item in graph:
1212    if 'import' in item and item['import'][1][1:] in WASI_IMPORTS:
1213      item['import'][0] = Settings.WASI_MODULE_NAME
1214  # fixup wasm backend prefixing
1215  for item in graph:
1216    if 'import' in item:
1217      if item['import'][1][0] == '_':
1218        item['import'][1] = item['import'][1][1:]
1219  # map import names from wasm to JS, using the actual name the wasm uses for the import
1220  import_name_map = {}
1221  for item in graph:
1222    if 'import' in item:
1223      import_name_map[item['name']] = 'emcc$import$' + item['import'][1]
1224  temp = temp_files.get('.txt').name
1225  txt = json.dumps(graph)
1226  with open(temp, 'w') as f:
1227    f.write(txt)
1228  # run wasm-metadce
1229  out = run_binaryen_command('wasm-metadce',
1230                             wasm_file,
1231                             wasm_file,
1232                             ['--graph-file=' + temp],
1233                             debug=debug_info,
1234                             stdout=PIPE)
1235  # find the unused things in js
1236  unused = []
1237  PREFIX = 'unused: '
1238  for line in out.splitlines():
1239    if line.startswith(PREFIX):
1240      name = line.replace(PREFIX, '').strip()
1241      if name in import_name_map:
1242        name = import_name_map[name]
1243      unused.append(name)
1244  # remove them
1245  passes = ['applyDCEGraphRemovals']
1246  if minify_whitespace:
1247    passes.append('minifyWhitespace')
1248  extra_info = {'unused': unused}
1249  return acorn_optimizer(js_file, passes, extra_info=json.dumps(extra_info))
1250
1251
1252def asyncify_lazy_load_code(wasm_binary_target, debug):
1253  # create the lazy-loaded wasm. remove the memory segments from it, as memory
1254  # segments have already been applied by the initial wasm, and apply the knowledge
1255  # that it will only rewind, after which optimizations can remove some code
1256  args = ['--remove-memory', '--mod-asyncify-never-unwind']
1257  if Settings.OPT_LEVEL > 0:
1258    args.append(opt_level_to_str(Settings.OPT_LEVEL, Settings.SHRINK_LEVEL))
1259  run_wasm_opt(wasm_binary_target,
1260               wasm_binary_target + '.lazy.wasm',
1261               args=args,
1262               debug=debug)
1263  # re-optimize the original, by applying the knowledge that imports will
1264  # definitely unwind, and we never rewind, after which optimizations can remove
1265  # a lot of code
1266  # TODO: support other asyncify stuff, imports that don't always unwind?
1267  # TODO: source maps etc.
1268  args = ['--mod-asyncify-always-and-only-unwind']
1269  if Settings.OPT_LEVEL > 0:
1270    args.append(opt_level_to_str(Settings.OPT_LEVEL, Settings.SHRINK_LEVEL))
1271  run_wasm_opt(infile=wasm_binary_target,
1272               outfile=wasm_binary_target,
1273               args=args,
1274               debug=debug)
1275
1276
1277def minify_wasm_imports_and_exports(js_file, wasm_file, minify_whitespace, minify_exports, debug_info):
1278  logger.debug('minifying wasm imports and exports')
1279  # run the pass
1280  if minify_exports:
1281    # standalone wasm mode means we need to emit a wasi import module.
1282    # otherwise, minify even the imported module names.
1283    if Settings.MINIFY_WASM_IMPORTED_MODULES:
1284      pass_name = '--minify-imports-and-exports-and-modules'
1285    else:
1286      pass_name = '--minify-imports-and-exports'
1287  else:
1288    pass_name = '--minify-imports'
1289  out = run_wasm_opt(wasm_file, wasm_file,
1290                     [pass_name],
1291                     debug=debug_info,
1292                     stdout=PIPE)
1293  # TODO this is the last tool we run, after normal opts and metadce. it
1294  # might make sense to run Stack IR optimizations here or even -O (as
1295  # metadce which runs before us might open up new general optimization
1296  # opportunities). however, the benefit is less than 0.5%.
1297
1298  # get the mapping
1299  SEP = ' => '
1300  mapping = {}
1301  for line in out.split('\n'):
1302    if SEP in line:
1303      old, new = line.strip().split(SEP)
1304      assert old not in mapping, 'imports must be unique'
1305      mapping[old] = new
1306  # apply them
1307  passes = ['applyImportAndExportNameChanges']
1308  if minify_whitespace:
1309    passes.append('minifyWhitespace')
1310  extra_info = {'mapping': mapping}
1311  return acorn_optimizer(js_file, passes, extra_info=json.dumps(extra_info))
1312
1313
1314def wasm2js(js_file, wasm_file, opt_level, minify_whitespace, use_closure_compiler, debug_info, symbols_file=None):
1315  logger.debug('wasm2js')
1316  args = ['--emscripten']
1317  if opt_level > 0:
1318    args += ['-O']
1319  if symbols_file:
1320    args += ['--symbols-file=%s' % symbols_file]
1321  wasm2js_js = run_binaryen_command('wasm2js', wasm_file,
1322                                    args=args,
1323                                    debug=debug_info,
1324                                    stdout=PIPE)
1325  if DEBUG:
1326    with open(os.path.join(get_emscripten_temp_dir(), 'wasm2js-output.js'), 'w') as f:
1327      f.write(wasm2js_js)
1328  # JS optimizations
1329  if opt_level >= 2:
1330    passes = []
1331    # it may be useful to also run: simplifyIfs, registerize, asmLastOpts
1332    # passes += ['simplifyExpressions'] # XXX fails on wasm3js.test_sqlite
1333    # TODO: enable name minification with pthreads. atm wasm2js emits pthread
1334    # helper functions outside of the asmFunc(), and they mix up minifyGlobals
1335    # (which assumes any vars in that area are global, like var HEAP8, but
1336    # those helpers have internal vars in a scope it doesn't understand yet)
1337    if not debug_info and not Settings.USE_PTHREADS:
1338      passes += ['minifyNames']
1339    if minify_whitespace:
1340      passes += ['minifyWhitespace']
1341    passes += ['last']
1342    if passes:
1343      # hackish fixups to work around wasm2js style and the js optimizer FIXME
1344      wasm2js_js = '// EMSCRIPTEN_START_ASM\n' + wasm2js_js + '// EMSCRIPTEN_END_ASM\n'
1345      wasm2js_js = wasm2js_js.replace('// EMSCRIPTEN_START_FUNCS;\n', '// EMSCRIPTEN_START_FUNCS\n')
1346      wasm2js_js = wasm2js_js.replace('// EMSCRIPTEN_END_FUNCS;\n', '// EMSCRIPTEN_END_FUNCS\n')
1347      wasm2js_js = wasm2js_js.replace('\n function $', '\nfunction $')
1348      wasm2js_js = wasm2js_js.replace('\n }', '\n}')
1349      wasm2js_js += '\n// EMSCRIPTEN_GENERATED_FUNCTIONS\n'
1350      temp = configuration.get_temp_files().get('.js').name
1351      with open(temp, 'w') as f:
1352        f.write(wasm2js_js)
1353      temp = js_optimizer(temp, passes)
1354      with open(temp) as f:
1355        wasm2js_js = f.read()
1356  # Closure compiler: in mode 1, we just minify the shell. In mode 2, we
1357  # minify the wasm2js output as well, which is ok since it isn't
1358  # validating asm.js.
1359  # TODO: in the non-closure case, we could run a lightweight general-
1360  #       purpose JS minifier here.
1361  if use_closure_compiler == 2:
1362    temp = configuration.get_temp_files().get('.js').name
1363    with open(temp, 'a') as f:
1364      f.write(wasm2js_js)
1365    temp = closure_compiler(temp, pretty=not minify_whitespace, advanced=False)
1366    with open(temp) as f:
1367      wasm2js_js = f.read()
1368    # closure may leave a trailing `;`, which would be invalid given where we place
1369    # this code (inside parens)
1370    wasm2js_js = wasm2js_js.strip()
1371    if wasm2js_js[-1] == ';':
1372      wasm2js_js = wasm2js_js[:-1]
1373  with open(js_file) as f:
1374    all_js = f.read()
1375  # quoted notation, something like Module['__wasm2jsInstantiate__']
1376  finds = re.findall(r'''[\w\d_$]+\[['"]__wasm2jsInstantiate__['"]\]''', all_js)
1377  if not finds:
1378    # post-closure notation, something like a.__wasm2jsInstantiate__
1379    finds = re.findall(r'''[\w\d_$]+\.__wasm2jsInstantiate__''', all_js)
1380  assert len(finds) == 1
1381  marker = finds[0]
1382  all_js = all_js.replace(marker, '(\n' + wasm2js_js + '\n)')
1383  # replace the placeholder with the actual code
1384  js_file = js_file + '.wasm2js.js'
1385  with open(js_file, 'w') as f:
1386    f.write(all_js)
1387  return js_file
1388
1389
1390def strip(infile, outfile, debug=False, producers=False):
1391  cmd = [LLVM_OBJCOPY, infile, outfile]
1392  if debug:
1393    cmd += ['--remove-section=.debug*']
1394  if producers:
1395    cmd += ['--remove-section=producers']
1396  run_process(cmd)
1397
1398
1399# extract the DWARF info from the main file, and leave the wasm with
1400# debug into as a file on the side
1401# TODO: emit only debug sections in the side file, and not the entire
1402#       wasm as well
1403def emit_debug_on_side(wasm_file, wasm_file_with_dwarf):
1404  # if the dwarf filename wasn't provided, use the default target + a suffix
1405  wasm_file_with_dwarf = shared.Settings.SEPARATE_DWARF
1406  if wasm_file_with_dwarf is True:
1407    wasm_file_with_dwarf = wasm_file + '.debug.wasm'
1408  embedded_path = shared.Settings.SEPARATE_DWARF_URL or wasm_file_with_dwarf
1409
1410  shutil.move(wasm_file, wasm_file_with_dwarf)
1411  strip(wasm_file_with_dwarf, wasm_file, debug=True)
1412
1413  # embed a section in the main wasm to point to the file with external DWARF,
1414  # see https://yurydelendik.github.io/webassembly-dwarf/#external-DWARF
1415  section_name = b'\x13external_debug_info' # section name, including prefixed size
1416  filename_bytes = asbytes(embedded_path)
1417  contents = WebAssembly.toLEB(len(filename_bytes)) + filename_bytes
1418  section_size = len(section_name) + len(contents)
1419  with open(wasm_file, 'ab') as f:
1420    f.write(b'\0') # user section is code 0
1421    f.write(WebAssembly.toLEB(section_size))
1422    f.write(section_name)
1423    f.write(contents)
1424
1425
1426def apply_wasm_memory_growth(js_file):
1427  logger.debug('supporting wasm memory growth with pthreads')
1428  fixed = acorn_optimizer(js_file, ['growableHeap'])
1429  ret = js_file + '.pgrow.js'
1430  with open(fixed, 'r') as fixed_f:
1431    with open(ret, 'w') as ret_f:
1432      with open(path_from_root('src', 'growableHeap.js')) as support_code_f:
1433        ret_f.write(support_code_f.read() + '\n' + fixed_f.read())
1434  return ret
1435
1436
1437def use_unsigned_pointers_in_js(js_file):
1438  logger.debug('using unsigned pointers in JS')
1439  return acorn_optimizer(js_file, ['unsignPointers'])
1440
1441
1442def instrument_js_for_asan(js_file):
1443  logger.debug('instrumenting JS memory accesses for ASan')
1444  return acorn_optimizer(js_file, ['asanify'])
1445
1446
1447def handle_final_wasm_symbols(wasm_file, symbols_file, debug_info):
1448  logger.debug('handle_final_wasm_symbols')
1449  args = []
1450  if symbols_file:
1451    args += ['--print-function-map']
1452  if not debug_info:
1453    # to remove debug info, we just write to that same file, and without -g
1454    args += ['-o', wasm_file]
1455  # ignore stderr because if wasm-opt is run without a -o it will warn
1456  output = run_wasm_opt(wasm_file, args=args, stdout=PIPE)
1457  if symbols_file:
1458    with open(symbols_file, 'w') as f:
1459      f.write(output)
1460
1461
1462def is_ar(filename):
1463  try:
1464    if _is_ar_cache.get(filename):
1465      return _is_ar_cache[filename]
1466    header = open(filename, 'rb').read(8)
1467    sigcheck = header == b'!<arch>\n'
1468    _is_ar_cache[filename] = sigcheck
1469    return sigcheck
1470  except Exception as e:
1471    logger.debug('is_ar failed to test whether file \'%s\' is a llvm archive file! Failed on exception: %s' % (filename, e))
1472    return False
1473
1474
1475def is_bitcode(filename):
1476  try:
1477    # look for magic signature
1478    b = open(filename, 'rb').read(4)
1479    if b[:2] == b'BC':
1480      return True
1481    # on macOS, there is a 20-byte prefix which starts with little endian
1482    # encoding of 0x0B17C0DE
1483    elif b == b'\xDE\xC0\x17\x0B':
1484      b = bytearray(open(filename, 'rb').read(22))
1485      return b[20:] == b'BC'
1486  except IndexError:
1487    # not enough characters in the input
1488    # note that logging will be done on the caller function
1489    pass
1490  return False
1491
1492
1493def is_wasm(filename):
1494  magic = open(filename, 'rb').read(4)
1495  return magic == b'\0asm'
1496
1497
1498# Given the name of a special Emscripten-implemented system library, returns an
1499# array of absolute paths to JS library files inside emscripten/src/ that
1500# corresponds to the library name.
1501def path_to_system_js_libraries(library_name):
1502  # Some native libraries are implemented in Emscripten as system side JS libraries
1503  js_system_libraries = {
1504    'c': '',
1505    'dl': '',
1506    'EGL': 'library_egl.js',
1507    'GL': ['library_webgl.js', 'library_html5_webgl.js'],
1508    'webgl.js': ['library_webgl.js', 'library_html5_webgl.js'],
1509    'GLESv2': 'library_webgl.js',
1510    # N.b. there is no GLESv3 to link to (note [f] in https://www.khronos.org/registry/implementers_guide.html)
1511    'GLEW': 'library_glew.js',
1512    'glfw': 'library_glfw.js',
1513    'glfw3': 'library_glfw.js',
1514    'GLU': '',
1515    'glut': 'library_glut.js',
1516    'm': '',
1517    'openal': 'library_openal.js',
1518    'rt': '',
1519    'pthread': '',
1520    'X11': 'library_xlib.js',
1521    'SDL': 'library_sdl.js',
1522    'stdc++': '',
1523    'uuid': 'library_uuid.js',
1524    'websocket': 'library_websocket.js'
1525  }
1526  library_files = []
1527  if library_name in js_system_libraries:
1528    if len(js_system_libraries[library_name]):
1529      lib = js_system_libraries[library_name] if isinstance(js_system_libraries[library_name], list) else [js_system_libraries[library_name]]
1530      library_files += lib
1531      logger.debug('Linking in JS library ' + str(lib))
1532
1533  elif library_name.endswith('.js') and os.path.isfile(path_from_root('src', 'library_' + library_name)):
1534    library_files += ['library_' + library_name]
1535
1536  return library_files
1537
1538
1539def emit_wasm_source_map(wasm_file, map_file):
1540  # source file paths must be relative to the location of the map (which is
1541  # emitted alongside the wasm)
1542  base_path = os.path.dirname(os.path.abspath(Settings.WASM_BINARY_FILE))
1543  sourcemap_cmd = [PYTHON, path_from_root('tools', 'wasm-sourcemap.py'),
1544                   wasm_file,
1545                   '--dwarfdump=' + LLVM_DWARFDUMP,
1546                   '-o',  map_file,
1547                   '--basepath=' + base_path]
1548  check_call(sourcemap_cmd)
1549
1550
1551def get_binaryen_feature_flags():
1552  # start with the MVP features, add the rest as needed
1553  ret = ['--mvp-features']
1554  if Settings.USE_PTHREADS:
1555    ret += ['--enable-threads']
1556  ret += Settings.BINARYEN_FEATURES
1557  return ret
1558
1559
1560def check_binaryen(bindir):
1561  opt = os.path.join(bindir, exe_suffix('wasm-opt'))
1562  if not os.path.exists(opt):
1563    exit_with_error('binaryen executable not found (%s). Please check your binaryen installation' % opt)
1564  try:
1565    output = run_process([opt, '--version'], stdout=PIPE).stdout
1566  except subprocess.CalledProcessError:
1567    exit_with_error('error running binaryen executable (%s). Please check your binaryen installation' % opt)
1568  if output:
1569    output = output.splitlines()[0]
1570  try:
1571    version = output.split()[2]
1572    version = int(version)
1573  except (IndexError, ValueError):
1574    exit_with_error('error parsing binaryen version (%s). Please check your binaryen installation (%s)' % (output, opt))
1575
1576  # Allow the expected version or the following one in order avoid needing to update both
1577  # emscripten and binaryen in lock step in emscripten-releases.
1578  if version not in (EXPECTED_BINARYEN_VERSION, EXPECTED_BINARYEN_VERSION + 1):
1579    diagnostics.warning('version-check', 'unexpected binaryen version: %s (expected %s)', version, EXPECTED_BINARYEN_VERSION)
1580
1581
1582def get_binaryen_bin():
1583  assert Settings.WASM, 'non wasm builds should not ask for binaryen'
1584  global binaryen_checked
1585  rtn = os.path.join(BINARYEN_ROOT, 'bin')
1586  if not binaryen_checked:
1587    check_binaryen(rtn)
1588    binaryen_checked = True
1589  return rtn
1590
1591
1592def run_binaryen_command(tool, infile, outfile=None, args=[], debug=False, stdout=None):
1593  cmd = [os.path.join(get_binaryen_bin(), tool)]
1594  if outfile and tool == 'wasm-opt' and Settings.DEBUG_LEVEL != 3:
1595    # remove any dwarf debug info sections, if the debug level is <3, as
1596    # we don't need them; also remove them if we the level is 4, as then we
1597    # want a source map, which is implemented separately from dwarf.
1598    # note that we add this pass first, so that it doesn't interfere with
1599    # the final set of passes (which may generate stack IR, and nothing
1600    # should be run after that)
1601    # TODO: if lld can strip dwarf then we don't need this. atm though it can
1602    #       only strip all debug info or none, which includes the name section
1603    #       which we may need
1604    # TODO: once fastcomp is gone, either remove source maps entirely, or
1605    #       support them by emitting a source map at the end from the dwarf,
1606    #       and use llvm-objcopy to remove that final dwarf
1607    cmd += ['--strip-dwarf']
1608  cmd += args
1609  if infile:
1610    cmd += [infile]
1611  if outfile:
1612    cmd += ['-o', outfile]
1613  if debug:
1614    cmd += ['-g'] # preserve the debug info
1615  # if the features are not already handled, handle them
1616  if '--detect-features' not in cmd:
1617    cmd += get_binaryen_feature_flags()
1618  print_compiler_stage(cmd)
1619  # if we are emitting a source map, every time we load and save the wasm
1620  # we must tell binaryen to update it
1621  emit_source_map = Settings.DEBUG_LEVEL == 4 and outfile
1622  if emit_source_map:
1623    cmd += ['--input-source-map=' + infile + '.map']
1624    cmd += ['--output-source-map=' + outfile + '.map']
1625  ret = check_call(cmd, stdout=stdout).stdout
1626  if outfile:
1627    save_intermediate(outfile, '%s.wasm' % tool)
1628  return ret
1629
1630
1631def run_wasm_opt(*args, **kwargs):
1632  return run_binaryen_command('wasm-opt', *args, **kwargs)
1633
1634
1635save_intermediate_counter = 0
1636
1637
1638def save_intermediate(src, dst):
1639  if DEBUG:
1640    global save_intermediate_counter
1641    dst = 'emcc-%d-%s' % (save_intermediate_counter, dst)
1642    save_intermediate_counter += 1
1643    dst = os.path.join(CANONICAL_TEMP_DIR, dst)
1644    logger.debug('saving debug copy %s' % dst)
1645    shutil.copyfile(src, dst)
1646