1#!/usr/bin/env vpython
2# Copyright 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Patch an orderfile.
7
8Starting with a list of symbols in a binary and an orderfile (ordered list of
9symbols), matches the symbols in the orderfile and augments each symbol with
10the symbols residing at the same address (due to having identical code).  The
11output is a list of symbols appropriate for the linker
12option --symbol-ordering-file for lld. Note this is not usable with gold (which
13uses section names to order the binary).
14
15Note: It is possible to have.
16- Several symbols mapping to the same offset in the binary.
17- Several offsets for a given symbol (because we strip the ".clone." and other
18  suffixes)
19
20The general pipeline is:
211. Get the symbol infos (name, offset, size, section) from the binary
222. Get the symbol names from the orderfile
233. Find the orderfile symbol names in the symbols coming from the binary
244. For each symbol found, get all the symbols at the same address
255. Output them to an updated orderfile suitable lld
26"""
27
28import argparse
29import collections
30import logging
31import re
32import sys
33
34import symbol_extractor
35
36# Suffixes for symbols.  These are due to method splitting for inlining and
37# method cloning for various reasons including constant propagation and
38# inter-procedural optimization.
39_SUFFIXES = ('.clone.', '.part.', '.isra.', '.constprop.')
40
41
42# The pattern and format for a linker-generated outlined function.
43_OUTLINED_FUNCTION_RE = re.compile(r'OUTLINED_FUNCTION_(?P<index>\d+)$')
44_OUTLINED_FUNCTION_FORMAT = 'OUTLINED_FUNCTION_{}'
45
46def RemoveSuffixes(name):
47  """Strips method name suffixes from cloning and splitting.
48
49  .clone. comes from cloning in -O3.
50  .part.  comes from partial method splitting for inlining.
51  .isra.  comes from inter-procedural optimizations.
52  .constprop. is cloning for constant propagation.
53  """
54  for suffix in _SUFFIXES:
55    name = name.split(suffix)[0]
56  return name
57
58
59def _UniqueGenerator(generator):
60  """Converts a generator to skip yielding elements already seen.
61
62  Example:
63    @_UniqueGenerator
64    def Foo():
65      yield 1
66      yield 2
67      yield 1
68      yield 3
69
70    Foo() yields 1,2,3.
71  """
72  def _FilteringFunction(*args, **kwargs):
73    returned = set()
74    for item in generator(*args, **kwargs):
75      if item in returned:
76        continue
77      returned.add(item)
78      yield item
79
80  return _FilteringFunction
81
82
83def _GroupSymbolsByOffset(binary_filename):
84  """Produce a map symbol name -> all symbol names at same offset.
85
86  Suffixes are stripped.
87  """
88  symbol_infos = [
89      s._replace(name=RemoveSuffixes(s.name))
90      for s in symbol_extractor.SymbolInfosFromBinary(binary_filename)]
91  offset_map = symbol_extractor.GroupSymbolInfosByOffset(symbol_infos)
92  missing_offsets = 0
93  sym_to_matching = {}
94  for sym in symbol_infos:
95    if sym.offset not in offset_map:
96      missing_offsets += 1
97      continue
98    matching = [s.name for s in offset_map[sym.offset]]
99    assert sym.name in matching
100    sym_to_matching[sym.name] = matching
101  return sym_to_matching
102
103
104def _GetMaxOutlinedIndex(sym_dict):
105  """Find the largest index of an outlined functions.
106
107  See _OUTLINED_FUNCTION_RE for the definition of the index. In practice the
108  maximum index equals the total number of outlined functions. This function
109  asserts that the index is near the total number of outlined functions.
110
111  Args:
112    sym_dict: Dict with symbol names as keys.
113
114  Returns:
115    The largest index of an outlined function seen in the keys of |sym_dict|.
116  """
117  seen = set()
118  for sym in sym_dict:
119    m = _OUTLINED_FUNCTION_RE.match(sym)
120    if m:
121      seen.add(int(m.group('index')))
122  if not seen:
123    return None
124  max_index = max(seen)
125  # Assert that the number of outlined functions is reasonable compared to the
126  # indices we've seen. At the time of writing, outlined functions are indexed
127  # consecutively from 0. If this radically changes, then other outlining
128  # behavior may have changed to violate some assumptions.
129  assert max_index < 2 * len(seen)
130  return max_index
131
132
133def _StripSuffixes(section_list):
134  """Remove all suffixes on items in a list of symbols."""
135  return [RemoveSuffixes(section) for section in section_list]
136
137
138def _PatchedSymbols(symbol_to_matching, profiled_symbols, max_outlined_index):
139  """Internal computation of an orderfile.
140
141  Args:
142    symbol_to_matching: ({symbol name -> [symbols at same offset]}), as from
143      _GroupSymbolsByOffset.
144    profiled_symbols: ([symbol names]) as from the unpatched orderfile.
145    max_outlined_index: (int or None) if not None, add outlined function names
146      to the end of the patched orderfile.
147
148  Yields:
149    Patched symbols, in a consistent order to profiled_symbols.
150  """
151  missing_symbol_count = 0
152  seen_symbols = set()
153  for sym in profiled_symbols:
154    if _OUTLINED_FUNCTION_RE.match(sym):
155      continue
156    if sym in seen_symbols:
157      continue
158    if sym not in symbol_to_matching:
159      missing_symbol_count += 1
160      continue
161    for matching in symbol_to_matching[sym]:
162      if matching in seen_symbols:
163        continue
164      if _OUTLINED_FUNCTION_RE.match(matching):
165        continue
166      yield matching
167      seen_symbols.add(matching)
168    assert sym in seen_symbols
169  logging.warning('missing symbol count = %d', missing_symbol_count)
170
171  if max_outlined_index is not None:
172    # The number of outlined functions may change with each build, so only
173    # ordering the outlined functions currently in the binary will not
174    # guarantee ordering after code changes before the next orderfile is
175    # generated. So we double the number of outlined functions as a measure of
176    # security.
177    for idx in xrange(2 * max_outlined_index + 1):
178      yield _OUTLINED_FUNCTION_FORMAT.format(idx)
179
180
181@_UniqueGenerator
182def ReadOrderfile(orderfile):
183  """Reads an orderfile and cleans up symbols.
184
185  Args:
186    orderfile: The name of the orderfile.
187
188  Yields:
189    Symbol names, cleaned and unique.
190  """
191  with open(orderfile) as f:
192    for line in f.xreadlines():
193      line = line.strip()
194      if line:
195        yield line
196
197
198def GeneratePatchedOrderfile(unpatched_orderfile, native_lib_filename,
199                             output_filename, order_outlined=False):
200  """Writes a patched orderfile.
201
202  Args:
203    unpatched_orderfile: (str) Path to the unpatched orderfile.
204    native_lib_filename: (str) Path to the native library.
205    output_filename: (str) Path to the patched orderfile.
206    order_outlined: (bool) If outlined function symbols are present in the
207      native library, then add ordering of them to the orderfile. If there
208      are no outlined function symbols present then this flag has no effect.
209  """
210  symbol_to_matching = _GroupSymbolsByOffset(native_lib_filename)
211  if order_outlined:
212    max_outlined_index = _GetMaxOutlinedIndex(symbol_to_matching)
213    if not max_outlined_index:
214      # Only generate ordered outlined functions if they already appeared in
215      # the library.
216      max_outlined_index = None
217  else:
218    max_outlined_index = None  # Ignore outlining.
219  profiled_symbols = ReadOrderfile(unpatched_orderfile)
220
221  with open(output_filename, 'w') as f:
222    # Make sure the anchor functions are located in the right place, here and
223    # after everything else.
224    # See the comment in //base/android/library_loader/anchor_functions.cc.
225    #
226    # __cxx_global_var_init is one of the largest symbols (~38kB as of May
227    # 2018), called extremely early, and not instrumented.
228    for first_section in ('dummy_function_start_of_ordered_text',
229                          '__cxx_global_var_init'):
230      f.write(first_section + '\n')
231
232    for sym in _PatchedSymbols(symbol_to_matching, profiled_symbols,
233                               max_outlined_index):
234      f.write(sym + '\n')
235
236    f.write('dummy_function_end_of_ordered_text\n')
237
238
239def _CreateArgumentParser():
240  """Creates and returns the argument parser."""
241  parser = argparse.ArgumentParser()
242  parser.add_argument('--target-arch', action='store', default='arm',
243                      choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'],
244                      help='The target architecture for the library.')
245  parser.add_argument('--unpatched-orderfile', required=True,
246                      help='Path to the unpatched orderfile')
247  parser.add_argument('--native-library', required=True,
248                      help='Path to the native library')
249  parser.add_argument('--output-file', required=True, help='Output filename')
250  return parser
251
252
253def main():
254  parser = _CreateArgumentParser()
255  options = parser.parse_args()
256  symbol_extractor.SetArchitecture(options.target_arch)
257  GeneratePatchedOrderfile(options.unpatched_orderfile, options.native_library,
258                           options.output_file)
259  return 0
260
261
262if __name__ == '__main__':
263  logging.basicConfig(level=logging.INFO)
264  sys.exit(main())
265