2# Copyright 2018 The Emscripten Authors.  All rights reserved.
3# Emscripten is available under two separate licenses, the MIT license and the
4# University of Illinois/NCSA Open Source License.  Both these licenses can be
5# found in the LICENSE file.
7"""Utility tools that extracts DWARF information encoded in a wasm output
8produced by the LLVM tools, and encodes it as a wasm source map. Additionally,
9it can collect original sources, change files prefixes, and strip debug
10sections from a wasm file.
13import argparse
14from collections import OrderedDict
15import json
16import logging
17from math import floor, log
18import os
19import re
20from subprocess import Popen, PIPE
21import sys
23sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
25from tools.shared import asstr
27logger = logging.getLogger('wasm-sourcemap')
30def parse_args():
31  parser = argparse.ArgumentParser(prog='wasm-sourcemap.py', description=__doc__)
32  parser.add_argument('wasm', help='wasm file')
33  parser.add_argument('-o', '--output', help='output source map')
34  parser.add_argument('-p', '--prefix', nargs='*', help='replace source debug filename prefix for source map', default=[])
35  parser.add_argument('-s', '--sources', action='store_true', help='read and embed source files from file system into source map')
36  parser.add_argument('-l', '--load-prefix', nargs='*', help='replace source debug filename prefix for reading sources from file system (see also --sources)', default=[])
37  parser.add_argument('-w', nargs='?', help='set output wasm file')
38  parser.add_argument('-x', '--strip', action='store_true', help='removes debug and linking sections')
39  parser.add_argument('-u', '--source-map-url', nargs='?', help='specifies sourceMappingURL section contest')
40  parser.add_argument('--dwarfdump', help="path to llvm-dwarfdump executable")
41  parser.add_argument('--dwarfdump-output', nargs='?', help=argparse.SUPPRESS)
42  parser.add_argument('--basepath', help='base path for source files, which will be relative to this')
43  return parser.parse_args()
46class Prefixes:
47  def __init__(self, args):
48    prefixes = []
49    for p in args:
50      if '=' in p:
51        prefix, replacement = p.split('=')
52        prefixes.append({'prefix': prefix, 'replacement': replacement})
53      else:
54        prefixes.append({'prefix': p, 'replacement': None})
55    self.prefixes = prefixes
56    self.cache = {}
58  def resolve(self, name):
59    if name in self.cache:
60      return self.cache[name]
62    for p in self.prefixes:
63      if name.startswith(p['prefix']):
64        if p['replacement'] is None:
65          result = name[len(p['prefix'])::]
66        else:
67          result = p['replacement'] + name[len(p['prefix'])::]
68        break
69    self.cache[name] = result
70    return result
73# SourceMapPrefixes contains resolver for file names that are:
74#  - "sources" is for names that output to source maps JSON
75#  - "load" is for paths that used to load source text
76class SourceMapPrefixes:
77  def __init__(self, sources, load):
78    self.sources = sources
79    self.load = load
81  def provided(self):
82    return bool(self.sources.prefixes or self.load.prefixes)
85def encode_vlq(n):
86  VLQ_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
87  x = (n << 1) if n >= 0 else ((-n << 1) + 1)
88  result = ""
89  while x > 31:
90    result = result + VLQ_CHARS[32 + (x & 31)]
91    x = x >> 5
92  return result + VLQ_CHARS[x]
95def read_var_uint(wasm, pos):
96  n = 0
97  shift = 0
98  b = ord(wasm[pos:pos + 1])
99  pos = pos + 1
100  while b >= 128:
101    n = n | ((b - 128) << shift)
102    b = ord(wasm[pos:pos + 1])
103    pos = pos + 1
104    shift += 7
105  return n + (b << shift), pos
108def strip_debug_sections(wasm):
109  logger.debug('Strip debug sections')
110  pos = 8
111  stripped = wasm[:pos]
113  while pos < len(wasm):
114    section_start = pos
115    section_id, pos_ = read_var_uint(wasm, pos)
116    section_size, section_body = read_var_uint(wasm, pos_)
117    pos = section_body + section_size
118    if section_id == 0:
119      name_len, name_pos = read_var_uint(wasm, section_body)
120      name_end = name_pos + name_len
121      name = wasm[name_pos:name_end]
122      if name == "linking" or name == "sourceMappingURL" or name.startswith("reloc..debug_") or name.startswith(".debug_"):
123        continue  # skip debug related sections
124    stripped = stripped + wasm[section_start:pos]
126  return stripped
129def encode_uint_var(n):
130  result = bytearray()
131  while n > 127:
132    result.append(128 | (n & 127))
133    n = n >> 7
134  result.append(n)
135  return bytes(result)
138def append_source_mapping(wasm, url):
139  logger.debug('Append sourceMappingURL section')
140  section_name = "sourceMappingURL"
141  section_content = encode_uint_var(len(section_name)) + section_name + encode_uint_var(len(url)) + url
142  return wasm + encode_uint_var(0) + encode_uint_var(len(section_content)) + section_content
145def get_code_section_offset(wasm):
146  logger.debug('Read sections index')
147  pos = 8
149  while pos < len(wasm):
150    section_id, pos_ = read_var_uint(wasm, pos)
151    section_size, pos = read_var_uint(wasm, pos_)
152    if section_id == 10:
153      return pos
154    pos = pos + section_size
157def remove_dead_entries(entries):
158  # Remove entries for dead functions. It is a heuristics to ignore data if the
159  # function starting address near to 0 (is equal to its size field length).
160  block_start = 0
161  cur_entry = 0
162  while cur_entry < len(entries):
163    if not entries[cur_entry]['eos']:
164      cur_entry += 1
165      continue
166    fn_start = entries[block_start]['address']
167    # Calculate the LEB encoded function size (including size field)
168    fn_size_length = floor(log(entries[cur_entry]['address'] - fn_start + 1, 128)) + 1
169    min_live_offset = 1 + fn_size_length # 1 byte is for code section entries
170    if fn_start < min_live_offset:
171      # Remove dead code debug info block.
172      del entries[block_start:cur_entry + 1]
173      cur_entry = block_start
174      continue
175    cur_entry += 1
176    block_start = cur_entry
179def read_dwarf_entries(wasm, options):
180  if options.dwarfdump_output:
181    output = open(options.dwarfdump_output, 'r').read()
182  elif options.dwarfdump:
183    logger.debug('Reading DWARF information from %s' % wasm)
184    if not os.path.exists(options.dwarfdump):
185      logger.error('llvm-dwarfdump not found: ' + options.dwarfdump)
186      sys.exit(1)
187    process = Popen([options.dwarfdump, '-debug-info', '-debug-line', '--recurse-depth=0', wasm], stdout=PIPE)
188    output, err = process.communicate()
189    exit_code = process.wait()
190    if exit_code != 0:
191      logger.error('Error during llvm-dwarfdump execution (%s)' % exit_code)
192      sys.exit(1)
193  else:
194    logger.error('Please specify either --dwarfdump or --dwarfdump-output')
195    sys.exit(1)
197  entries = []
198  debug_line_chunks = re.split(r"debug_line\[(0x[0-9a-f]*)\]", asstr(output))
199  maybe_debug_info_content = debug_line_chunks[0]
200  for i in range(1, len(debug_line_chunks), 2):
201    stmt_list = debug_line_chunks[i]
202    comp_dir_match = re.search(r"DW_AT_stmt_list\s+\(" + stmt_list + r"\)\s+" +
203                               r"DW_AT_comp_dir\s+\(\"([^\"]+)", maybe_debug_info_content)
204    comp_dir = comp_dir_match.group(1) if comp_dir_match is not None else ""
206    line_chunk = debug_line_chunks[i + 1]
208    # include_directories[  1] = "/Users/yury/Work/junk/sqlite-playground/src"
209    # file_names[  1]:
210    #            name: "playground.c"
211    #       dir_index: 1
212    #        mod_time: 0x00000000
213    #          length: 0x00000000
214    #
215    # Address            Line   Column File   ISA Discriminator Flags
216    # ------------------ ------ ------ ------ --- ------------- -------------
217    # 0x0000000000000006     22      0      1   0             0  is_stmt
218    # 0x0000000000000007     23     10      1   0             0  is_stmt prologue_end
219    # 0x000000000000000f     23      3      1   0             0
220    # 0x0000000000000010     23      3      1   0             0  end_sequence
221    # 0x0000000000000011     28      0      1   0             0  is_stmt
223    include_directories = {'0': comp_dir}
224    for dir in re.finditer(r"include_directories\[\s*(\d+)\] = \"([^\"]*)", line_chunk):
225      include_directories[dir.group(1)] = dir.group(2)
227    files = {}
228    for file in re.finditer(r"file_names\[\s*(\d+)\]:\s+name: \"([^\"]*)\"\s+dir_index: (\d+)", line_chunk):
229      dir = include_directories[file.group(3)]
230      file_path = (dir + '/' if file.group(2)[0] != '/' else '') + file.group(2)
231      files[file.group(1)] = file_path
233    for line in re.finditer(r"\n0x([0-9a-f]+)\s+(\d+)\s+(\d+)\s+(\d+)(.*?end_sequence)?", line_chunk):
234      entry = {'address': int(line.group(1), 16), 'line': int(line.group(2)), 'column': int(line.group(3)), 'file': files[line.group(4)], 'eos': line.group(5) is not None}
235      if not entry['eos']:
236        entries.append(entry)
237      else:
238        # move end of function to the last END operator
239        entry['address'] -= 1
240        if entries[-1]['address'] == entry['address']:
241          # last entry has the same address, reusing
242          entries[-1]['eos'] = True
243        else:
244          entries.append(entry)
246  remove_dead_entries(entries)
248  # return entries sorted by the address field
249  return sorted(entries, key=lambda entry: entry['address'])
252def normalize_path(path):
253  return path.replace('\\', '/').replace('//', '/')
256def build_sourcemap(entries, code_section_offset, prefixes, collect_sources, base_path):
257  sources = []
258  sources_content = [] if collect_sources else None
259  mappings = []
260  sources_map = {}
261  last_address = 0
262  last_source_id = 0
263  last_line = 1
264  last_column = 1
265  for entry in entries:
266    line = entry['line']
267    column = entry['column']
268    # ignore entries with line 0
269    if line == 0:
270      continue
271    # start at least at column 1
272    if column == 0:
273      column = 1
274    address = entry['address'] + code_section_offset
275    file_name = entry['file']
276    file_name = normalize_path(file_name)
277    # if prefixes were provided, we use that; otherwise, we emit a relative
278    # path
279    if prefixes.provided():
280      source_name = prefixes.sources.resolve(file_name)
281    else:
282      try:
283        file_name = os.path.relpath(file_name, base_path)
284      except ValueError:
285        file_name = os.path.abspath(file_name)
286      file_name = normalize_path(file_name)
287      source_name = file_name
288    if source_name not in sources_map:
289      source_id = len(sources)
290      sources_map[source_name] = source_id
291      sources.append(source_name)
292      if collect_sources:
293        load_name = prefixes.load.resolve(file_name)
294        try:
295          with open(load_name, 'r') as infile:
296            source_content = infile.read()
297          sources_content.append(source_content)
298        except IOError:
299          print('Failed to read source: %s' % load_name)
300          sources_content.append(None)
301    else:
302      source_id = sources_map[source_name]
304    address_delta = address - last_address
305    source_id_delta = source_id - last_source_id
306    line_delta = line - last_line
307    column_delta = column - last_column
308    mappings.append(encode_vlq(address_delta) + encode_vlq(source_id_delta) + encode_vlq(line_delta) + encode_vlq(column_delta))
309    last_address = address
310    last_source_id = source_id
311    last_line = line
312    last_column = column
313  return OrderedDict([('version', 3),
314                      ('names', []),
315                      ('sources', sources),
316                      ('sourcesContent', sources_content),
317                      ('mappings', ','.join(mappings))])
320def main():
321  options = parse_args()
323  wasm_input = options.wasm
324  with open(wasm_input, 'rb') as infile:
325    wasm = infile.read()
327  entries = read_dwarf_entries(wasm_input, options)
329  code_section_offset = get_code_section_offset(wasm)
331  prefixes = SourceMapPrefixes(sources=Prefixes(options.prefix), load=Prefixes(options.load_prefix))
333  logger.debug('Saving to %s' % options.output)
334  map = build_sourcemap(entries, code_section_offset, prefixes, options.sources, options.basepath)
335  with open(options.output, 'w') as outfile:
336    json.dump(map, outfile, separators=(',', ':'))
338  if options.strip:
339    wasm = strip_debug_sections(wasm)
341  if options.source_map_url:
342    wasm = append_source_mapping(wasm, options.source_map_url)
344  if options.w:
345    logger.debug('Saving wasm to %s' % options.w)
346    with open(options.w, 'wb') as outfile:
347      outfile.write(wasm)
349  logger.debug('Done')
350  return 0
353if __name__ == '__main__':
354  logging.basicConfig(level=logging.DEBUG if os.environ.get('EMCC_DEBUG') else logging.INFO)
355  sys.exit(main())