1#!/usr/bin/env vpython 2# Copyright 2013 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5 6"""Patch an orderfile. 7 8Starting with a list of symbols in a binary and an orderfile (ordered list of 9symbols), matches the symbols in the orderfile and augments each symbol with 10the symbols residing at the same address (due to having identical code). The 11output is a list of symbols appropriate for the linker 12option --symbol-ordering-file for lld. Note this is not usable with gold (which 13uses section names to order the binary). 14 15Note: It is possible to have. 16- Several symbols mapping to the same offset in the binary. 17- Several offsets for a given symbol (because we strip the ".clone." and other 18 suffixes) 19 20The general pipeline is: 211. Get the symbol infos (name, offset, size, section) from the binary 222. Get the symbol names from the orderfile 233. Find the orderfile symbol names in the symbols coming from the binary 244. For each symbol found, get all the symbols at the same address 255. Output them to an updated orderfile suitable lld 26""" 27 28import argparse 29import collections 30import logging 31import re 32import sys 33 34import symbol_extractor 35 36# Suffixes for symbols. These are due to method splitting for inlining and 37# method cloning for various reasons including constant propagation and 38# inter-procedural optimization. 39_SUFFIXES = ('.clone.', '.part.', '.isra.', '.constprop.') 40 41 42# The pattern and format for a linker-generated outlined function. 43_OUTLINED_FUNCTION_RE = re.compile(r'OUTLINED_FUNCTION_(?P<index>\d+)$') 44_OUTLINED_FUNCTION_FORMAT = 'OUTLINED_FUNCTION_{}' 45 46def RemoveSuffixes(name): 47 """Strips method name suffixes from cloning and splitting. 48 49 .clone. comes from cloning in -O3. 50 .part. comes from partial method splitting for inlining. 51 .isra. comes from inter-procedural optimizations. 52 .constprop. is cloning for constant propagation. 53 """ 54 for suffix in _SUFFIXES: 55 name = name.split(suffix)[0] 56 return name 57 58 59def _UniqueGenerator(generator): 60 """Converts a generator to skip yielding elements already seen. 61 62 Example: 63 @_UniqueGenerator 64 def Foo(): 65 yield 1 66 yield 2 67 yield 1 68 yield 3 69 70 Foo() yields 1,2,3. 71 """ 72 def _FilteringFunction(*args, **kwargs): 73 returned = set() 74 for item in generator(*args, **kwargs): 75 if item in returned: 76 continue 77 returned.add(item) 78 yield item 79 80 return _FilteringFunction 81 82 83def _GroupSymbolsByOffset(binary_filename): 84 """Produce a map symbol name -> all symbol names at same offset. 85 86 Suffixes are stripped. 87 """ 88 symbol_infos = [ 89 s._replace(name=RemoveSuffixes(s.name)) 90 for s in symbol_extractor.SymbolInfosFromBinary(binary_filename)] 91 offset_map = symbol_extractor.GroupSymbolInfosByOffset(symbol_infos) 92 missing_offsets = 0 93 sym_to_matching = {} 94 for sym in symbol_infos: 95 if sym.offset not in offset_map: 96 missing_offsets += 1 97 continue 98 matching = [s.name for s in offset_map[sym.offset]] 99 assert sym.name in matching 100 sym_to_matching[sym.name] = matching 101 return sym_to_matching 102 103 104def _GetMaxOutlinedIndex(sym_dict): 105 """Find the largest index of an outlined functions. 106 107 See _OUTLINED_FUNCTION_RE for the definition of the index. In practice the 108 maximum index equals the total number of outlined functions. This function 109 asserts that the index is near the total number of outlined functions. 110 111 Args: 112 sym_dict: Dict with symbol names as keys. 113 114 Returns: 115 The largest index of an outlined function seen in the keys of |sym_dict|. 116 """ 117 seen = set() 118 for sym in sym_dict: 119 m = _OUTLINED_FUNCTION_RE.match(sym) 120 if m: 121 seen.add(int(m.group('index'))) 122 if not seen: 123 return None 124 max_index = max(seen) 125 # Assert that the number of outlined functions is reasonable compared to the 126 # indices we've seen. At the time of writing, outlined functions are indexed 127 # consecutively from 0. If this radically changes, then other outlining 128 # behavior may have changed to violate some assumptions. 129 assert max_index < 2 * len(seen) 130 return max_index 131 132 133def _StripSuffixes(section_list): 134 """Remove all suffixes on items in a list of symbols.""" 135 return [RemoveSuffixes(section) for section in section_list] 136 137 138def _PatchedSymbols(symbol_to_matching, profiled_symbols, max_outlined_index): 139 """Internal computation of an orderfile. 140 141 Args: 142 symbol_to_matching: ({symbol name -> [symbols at same offset]}), as from 143 _GroupSymbolsByOffset. 144 profiled_symbols: ([symbol names]) as from the unpatched orderfile. 145 max_outlined_index: (int or None) if not None, add outlined function names 146 to the end of the patched orderfile. 147 148 Yields: 149 Patched symbols, in a consistent order to profiled_symbols. 150 """ 151 missing_symbol_count = 0 152 seen_symbols = set() 153 for sym in profiled_symbols: 154 if _OUTLINED_FUNCTION_RE.match(sym): 155 continue 156 if sym in seen_symbols: 157 continue 158 if sym not in symbol_to_matching: 159 missing_symbol_count += 1 160 continue 161 for matching in symbol_to_matching[sym]: 162 if matching in seen_symbols: 163 continue 164 if _OUTLINED_FUNCTION_RE.match(matching): 165 continue 166 yield matching 167 seen_symbols.add(matching) 168 assert sym in seen_symbols 169 logging.warning('missing symbol count = %d', missing_symbol_count) 170 171 if max_outlined_index is not None: 172 # The number of outlined functions may change with each build, so only 173 # ordering the outlined functions currently in the binary will not 174 # guarantee ordering after code changes before the next orderfile is 175 # generated. So we double the number of outlined functions as a measure of 176 # security. 177 for idx in xrange(2 * max_outlined_index + 1): 178 yield _OUTLINED_FUNCTION_FORMAT.format(idx) 179 180 181@_UniqueGenerator 182def ReadOrderfile(orderfile): 183 """Reads an orderfile and cleans up symbols. 184 185 Args: 186 orderfile: The name of the orderfile. 187 188 Yields: 189 Symbol names, cleaned and unique. 190 """ 191 with open(orderfile) as f: 192 for line in f.xreadlines(): 193 line = line.strip() 194 if line: 195 yield line 196 197 198def GeneratePatchedOrderfile(unpatched_orderfile, native_lib_filename, 199 output_filename, order_outlined=False): 200 """Writes a patched orderfile. 201 202 Args: 203 unpatched_orderfile: (str) Path to the unpatched orderfile. 204 native_lib_filename: (str) Path to the native library. 205 output_filename: (str) Path to the patched orderfile. 206 order_outlined: (bool) If outlined function symbols are present in the 207 native library, then add ordering of them to the orderfile. If there 208 are no outlined function symbols present then this flag has no effect. 209 """ 210 symbol_to_matching = _GroupSymbolsByOffset(native_lib_filename) 211 if order_outlined: 212 max_outlined_index = _GetMaxOutlinedIndex(symbol_to_matching) 213 if not max_outlined_index: 214 # Only generate ordered outlined functions if they already appeared in 215 # the library. 216 max_outlined_index = None 217 else: 218 max_outlined_index = None # Ignore outlining. 219 profiled_symbols = ReadOrderfile(unpatched_orderfile) 220 221 with open(output_filename, 'w') as f: 222 # Make sure the anchor functions are located in the right place, here and 223 # after everything else. 224 # See the comment in //base/android/library_loader/anchor_functions.cc. 225 # 226 # __cxx_global_var_init is one of the largest symbols (~38kB as of May 227 # 2018), called extremely early, and not instrumented. 228 for first_section in ('dummy_function_start_of_ordered_text', 229 '__cxx_global_var_init'): 230 f.write(first_section + '\n') 231 232 for sym in _PatchedSymbols(symbol_to_matching, profiled_symbols, 233 max_outlined_index): 234 f.write(sym + '\n') 235 236 f.write('dummy_function_end_of_ordered_text\n') 237 238 239def _CreateArgumentParser(): 240 """Creates and returns the argument parser.""" 241 parser = argparse.ArgumentParser() 242 parser.add_argument('--target-arch', action='store', default='arm', 243 choices=['arm', 'arm64', 'x86', 'x86_64', 'x64', 'mips'], 244 help='The target architecture for the library.') 245 parser.add_argument('--unpatched-orderfile', required=True, 246 help='Path to the unpatched orderfile') 247 parser.add_argument('--native-library', required=True, 248 help='Path to the native library') 249 parser.add_argument('--output-file', required=True, help='Output filename') 250 return parser 251 252 253def main(): 254 parser = _CreateArgumentParser() 255 options = parser.parse_args() 256 symbol_extractor.SetArchitecture(options.target_arch) 257 GeneratePatchedOrderfile(options.unpatched_orderfile, options.native_library, 258 options.output_file) 259 return 0 260 261 262if __name__ == '__main__': 263 logging.basicConfig(level=logging.INFO) 264 sys.exit(main()) 265