1# Copyright 2014 The Chromium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Helper script to close over all transitive dependencies of a given .nexe
6executable.
7
8e.g. Given
9A -> B
10B -> C
11B -> D
12C -> E
13
14where "A -> B" means A depends on B, then GetNeeded(A) will return A, B, C, D
15and E.
16"""
17
18import os
19import re
20import subprocess
21
22import elf
23
24SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
25SDK_DIR = os.path.dirname(os.path.dirname(SCRIPT_DIR))
26
27NeededMatcher = re.compile('^ *NEEDED *([^ ]+)\n$')
28FormatMatcher = re.compile('^(.+):\\s*file format (.+)\n$')
29
30LOADER_X86 = 'runnable-ld.so'  # Name of the dynamic loader
31LOADER_ARM = 'elf_loader_arm.nexe'  # Name of the ARM dynamic loader
32
33OBJDUMP_ARCH_MAP = {
34    # Names returned by Linux's objdump:
35    'elf64-x86-64': 'x86-64',
36    'elf32-i386': 'x86-32',
37    'elf32-little': 'arm',
38    'elf32-littlearm': 'arm',
39    # Names returned by old x86_64-nacl-objdump:
40    'elf64-nacl': 'x86-64',
41    'elf32-nacl': 'x86-32',
42    # Names returned by new x86_64-nacl-objdump:
43    'elf64-x86-64-nacl': 'x86-64',
44    'elf32-x86-64-nacl': 'x86-64',
45    'elf32-i386-nacl': 'x86-32',
46    'elf32-littlearm-nacl': 'arm',
47}
48
49# The proper name of the dynamic linker, as kept in the IRT.  This is
50# excluded from the nmf file by convention.
51LD_NACL_MAP = {
52    'x86-32': 'ld-nacl-x86-32.so.1',
53    'x86-64': 'ld-nacl-x86-64.so.1',
54    'arm': None,
55}
56
57
58class Error(Exception):
59  '''Local Error class for this file.'''
60  pass
61
62
63class NoObjdumpError(Error):
64  '''Error raised when objdump is needed but not found'''
65  pass
66
67
68def GetNeeded(main_files, objdump, lib_path):
69  '''Collect the list of dependencies for the main_files
70
71  Args:
72    main_files: A list of files to find dependencies of.
73    objdump: Path to the objdump executable.
74    lib_path: A list of paths to search for shared libraries.
75
76  Returns:
77    A dict with key=filename and value=architecture. The architecture will be
78    one of ('x86_32', 'x86_64', 'arm').
79  '''
80
81  dynamic = any(elf.ParseElfHeader(f)[1] for f in main_files)
82
83  if dynamic:
84    return _GetNeededDynamic(main_files, objdump, lib_path)
85  else:
86    return _GetNeededStatic(main_files)
87
88
89def _GetNeededDynamic(main_files, objdump, lib_path):
90  examined = set()
91  all_files, unexamined = GleanFromObjdump(main_files, None, objdump, lib_path)
92  for arch in all_files.itervalues():
93    if unexamined:
94      if arch == 'arm':
95        unexamined.add((LOADER_ARM, arch))
96      else:
97        unexamined.add((LOADER_X86, arch))
98
99  while unexamined:
100    files_to_examine = {}
101
102    # Take all the currently unexamined files and group them
103    # by architecture.
104    for name, arch in unexamined:
105      files_to_examine.setdefault(arch, []).append(name)
106
107    # Call GleanFromObjdump() for each architecture.
108    needed = set()
109    for arch, files in files_to_examine.iteritems():
110      new_files, new_needed = GleanFromObjdump(files, arch, objdump, lib_path)
111      all_files.update(new_files)
112      needed |= new_needed
113
114    examined |= unexamined
115    unexamined = needed - examined
116
117  # With the runnable-ld.so scheme we have today, the proper name of
118  # the dynamic linker should be excluded from the list of files.
119  ldso = [LD_NACL_MAP[arch] for arch in set(OBJDUMP_ARCH_MAP.values())]
120  for filename, arch in all_files.items():
121    name = os.path.basename(filename)
122    if name in ldso:
123      del all_files[filename]
124
125  return all_files
126
127
128def GleanFromObjdump(files, arch, objdump, lib_path):
129  '''Get architecture and dependency information for given files
130
131  Args:
132    files: A list of files to examine.
133        [ '/path/to/my.nexe',
134          '/path/to/lib64/libmy.so',
135          '/path/to/mydata.so',
136          '/path/to/my.data' ]
137    arch: The architecure we are looking for, or None to accept any
138          architecture.
139    objdump: Path to the objdump executable.
140    lib_path: A list of paths to search for shared libraries.
141
142  Returns: A tuple with the following members:
143    input_info: A dict with key=filename and value=architecture. The
144        architecture will be one of ('x86_32', 'x86_64', 'arm').
145    needed: A set of strings formatted as "arch/name".  Example:
146        set(['x86-32/libc.so', 'x86-64/libgcc.so'])
147  '''
148  if not objdump:
149    raise NoObjdumpError('No objdump executable found!')
150
151  full_paths = set()
152  for filename in files:
153    if os.path.exists(filename):
154      full_paths.add(filename)
155    else:
156      for path in _FindLibsInPath(filename, lib_path):
157        full_paths.add(path)
158
159  cmd = [objdump, '-p'] + list(full_paths)
160  env = {'LANG': 'en_US.UTF-8'}
161  proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
162                          stderr=subprocess.PIPE, bufsize=-1,
163                          env=env)
164
165  input_info = {}
166  found_basenames = set()
167  needed = set()
168  output, err_output = proc.communicate()
169  if proc.returncode:
170    raise Error('%s\nStdError=%s\nobjdump failed with error code: %d' %
171                (output, err_output, proc.returncode))
172
173  file_arch = None
174  for line in output.splitlines(True):
175    # Objdump should display the architecture first and then the dependencies
176    # second for each file in the list.
177    matched = FormatMatcher.match(line)
178    if matched:
179      filename = matched.group(1)
180      file_arch = OBJDUMP_ARCH_MAP[matched.group(2)]
181      if arch and file_arch != arch:
182        continue
183      name = os.path.basename(filename)
184      found_basenames.add(name)
185      input_info[filename] = file_arch
186    matched = NeededMatcher.match(line)
187    if matched:
188      if arch and file_arch != arch:
189        continue
190      filename = matched.group(1)
191      new_needed = (filename, file_arch)
192      needed.add(new_needed)
193
194  for filename in files:
195    if os.path.basename(filename) not in found_basenames:
196      raise Error('Library not found [%s]: %s' % (arch, filename))
197
198  return input_info, needed
199
200
201def _FindLibsInPath(name, lib_path):
202  '''Finds the set of libraries matching |name| within lib_path
203
204  Args:
205    name: name of library to find
206    lib_path: A list of paths to search for shared libraries.
207
208  Returns:
209    A list of system paths that match the given name within the lib_path'''
210  files = []
211  for dirname in lib_path:
212    # The libc.so files in the the glibc toolchain is actually a linker
213    # script which references libc.so.<SHA1>.  This means the libc.so itself
214    # does not end up in the NEEDED section for glibc.
215    if name == 'libc.so':
216      continue
217    filename = os.path.join(dirname, name)
218    if os.path.exists(filename):
219      files.append(filename)
220  if not files:
221    raise Error('cannot find library %s' % name)
222  return files
223
224
225def _GetNeededStatic(main_files):
226  needed = {}
227  for filename in main_files:
228    arch = elf.ParseElfHeader(filename)[0]
229    needed[filename] = arch
230  return needed
231