1#!/usr/bin/env python 2# Copyright 2020 The Chromium Authors. All rights reserved. 3# Use of this source code is governed by a BSD-style license that can be 4# found in the LICENSE file. 5"""Reports binary size metrics for LaCrOS build artifacts. 6 7More information at //docs/speed/binary_size/metrics.md. 8""" 9 10import argparse 11import collections 12import contextlib 13import json 14import logging 15import os 16import subprocess 17import sys 18import tempfile 19 20 21@contextlib.contextmanager 22def _SysPath(path): 23 """Library import context that temporarily appends |path| to |sys.path|.""" 24 if path and path not in sys.path: 25 sys.path.insert(0, path) 26 else: 27 path = None # Indicates that |sys.path| is not modified. 28 try: 29 yield 30 finally: 31 if path: 32 sys.path.pop(0) 33 34 35DIR_SOURCE_ROOT = os.environ.get( 36 'CHECKOUT_SOURCE_ROOT', 37 os.path.abspath( 38 os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))) 39 40BUILD_COMMON_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util', 'lib', 41 'common') 42 43TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult', 44 'tracing') 45 46EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party', 47 'eu-strip', 'bin', 'eu-strip') 48 49with _SysPath(BUILD_COMMON_PATH): 50 import perf_tests_results_helper # pylint: disable=import-error 51 52with _SysPath(TRACING_PATH): 53 from tracing.value import convert_chart_json # pylint: disable=import-error 54 55_BASE_CHART = { 56 'format_version': '0.1', 57 'benchmark_name': 'resource_sizes', 58 'benchmark_description': 'LaCrOS resource size information.', 59 'trace_rerun_options': [], 60 'charts': {} 61} 62 63_KEY_RAW = 'raw' 64_KEY_GZIPPED = 'gzipped' 65_KEY_STRIPPED = 'stripped' 66_KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped' 67 68 69class _Group: 70 """A group of build artifacts whose file sizes are summed and tracked. 71 72 Build artifacts for size tracking fall under these categories: 73 * File: A single file. 74 * Group: A collection of files. 75 * Dir: All files under a directory. 76 77 Attributes: 78 paths: A list of files or directories to be tracked together. 79 title: The display name of the group. 80 track_stripped: Whether to also track summed stripped ELF sizes. 81 track_compressed: Whether to also track summed compressed sizes. 82 """ 83 84 def __init__(self, paths, title, track_stripped=False, 85 track_compressed=False): 86 self.paths = paths 87 self.title = title 88 self.track_stripped = track_stripped 89 self.track_compressed = track_compressed 90 91 92# List of disjoint build artifact groups for size tracking. This list should be 93# synched with lacros-amd64-generic-binary-size-rel builder contents (specified 94# in # //infra/config/subprojects/chromium/ci.star) and 95# chromeos-amd64-generic-lacros-internal builder (specified in src-internal). 96_TRACKED_GROUPS = [ 97 _Group(paths=['chrome'], 98 title='File: chrome', 99 track_stripped=True, 100 track_compressed=True), 101 _Group(paths=['chrome_crashpad_handler'], 102 title='File: chrome_crashpad_handler'), 103 _Group(paths=['icudtl.dat'], title='File: icudtl.dat'), 104 _Group(paths=['nacl_helper'], title='File: nacl_helper'), 105 _Group(paths=['nacl_irt_x86_64.nexe'], title='File: nacl_irt_x86_64.nexe'), 106 _Group(paths=['resources.pak'], title='File: resources.pak'), 107 _Group(paths=[ 108 'chrome_100_percent.pak', 'chrome_200_percent.pak', 'headless_lib.pak' 109 ], 110 title='Group: Other PAKs'), 111 _Group(paths=['snapshot_blob.bin'], title='Group: Misc'), 112 _Group(paths=['locales/'], title='Dir: locales'), 113 _Group(paths=['swiftshader/'], title='Dir: swiftshader'), 114 _Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'), 115] 116 117 118def _visit_paths(base_dir, paths): 119 """Itemizes files specified by a list of paths. 120 121 Args: 122 base_dir: Base directory for all elements in |paths|. 123 paths: A list of filenames or directory names to specify files whose sizes 124 to be counted. Directories are recursed. There's no de-duping effort. 125 Non-existing files or directories are ignored (with warning message). 126 """ 127 for path in paths: 128 full_path = os.path.join(base_dir, path) 129 if os.path.exists(full_path): 130 if os.path.isdir(full_path): 131 for dirpath, _, filenames in os.walk(full_path): 132 for filename in filenames: 133 yield os.path.join(dirpath, filename) 134 else: # Assume is file. 135 yield full_path 136 else: 137 logging.critical('Not found: %s', path) 138 139 140def _is_probably_elf(filename): 141 """Heuristically decides whether |filename| is ELF via magic signature.""" 142 with open(filename, 'rb') as fh: 143 return fh.read(4) == '\x7FELF' 144 145 146def _is_unstrippable_elf(filename): 147 """Identifies known-unstrippable ELF files to denoise the system.""" 148 return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so') 149 150 151def _get_filesize(filename): 152 """Returns the size of a file, or 0 if file is not found.""" 153 try: 154 return os.path.getsize(filename) 155 except OSError: 156 logging.critical('Failed to get size: %s', filename) 157 return 0 158 159 160def _get_gzipped_filesize(filename): 161 """Returns the gzipped size of a file, or 0 if file is not found.""" 162 BUFFER_SIZE = 65536 163 if not os.path.isfile(filename): 164 return 0 165 try: 166 # Call gzip externally instead of using gzip package since it's > 2x faster. 167 cmd = ['gzip', '-c', filename] 168 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 169 # Manually counting bytes instead of using len(p.communicate()[0]) to avoid 170 # buffering the entire compressed data (can be ~100 MB). 171 ret = 0 172 while True: 173 chunk = len(p.stdout.read(BUFFER_SIZE)) 174 if chunk == 0: 175 break 176 ret += chunk 177 return ret 178 except OSError: 179 logging.critical('Failed to get gzipped size: %s', filename) 180 return 0 181 182 183def _get_catagorized_filesizes(filename): 184 """Measures |filename| sizes under various transforms. 185 186 Returns: A Counter (keyed by _Key_* constants) that stores measured sizes. 187 """ 188 sizes = collections.Counter() 189 sizes[_KEY_RAW] = _get_filesize(filename) 190 sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename) 191 192 # Pre-assign values for non-ELF, or in case of failure for ELF. 193 sizes[_KEY_STRIPPED] = sizes[_KEY_RAW] 194 sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED] 195 196 if _is_probably_elf(filename) and not _is_unstrippable_elf(filename): 197 try: 198 fd, temp_file = tempfile.mkstemp() 199 os.close(fd) 200 cmd = [EU_STRIP_PATH, filename, '-o', temp_file] 201 subprocess.check_output(cmd) 202 sizes[_KEY_STRIPPED] = _get_filesize(temp_file) 203 sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file) 204 if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]: 205 # This weird case has been observed for libwidevinecdm.so. 206 logging.critical('Stripping made things worse for %s' % filename) 207 except subprocess.CalledProcessError: 208 logging.critical('Failed to strip file: %s' % filename) 209 finally: 210 os.unlink(temp_file) 211 return sizes 212 213 214def _dump_chart_json(output_dir, chartjson): 215 """Writes chart histogram to JSON files. 216 217 Output files: 218 results-chart.json contains the chart JSON. 219 perf_results.json contains histogram JSON for Catapult. 220 221 Args: 222 output_dir: Directory to place the JSON files. 223 chartjson: Source JSON data for output files. 224 """ 225 results_path = os.path.join(output_dir, 'results-chart.json') 226 logging.critical('Dumping chartjson to %s', results_path) 227 with open(results_path, 'w') as json_file: 228 json.dump(chartjson, json_file, indent=2) 229 230 # We would ideally generate a histogram set directly instead of generating 231 # chartjson then converting. However, perf_tests_results_helper is in 232 # //build, which doesn't seem to have any precedent for depending on 233 # anything in Catapult. This can probably be fixed, but since this doesn't 234 # need to be super fast or anything, converting is a good enough solution 235 # for the time being. 236 histogram_result = convert_chart_json.ConvertChartJson(results_path) 237 if histogram_result.returncode != 0: 238 raise Exception('chartjson conversion failed with error: ' + 239 histogram_result.stdout) 240 241 histogram_path = os.path.join(output_dir, 'perf_results.json') 242 logging.critical('Dumping histograms to %s', histogram_path) 243 with open(histogram_path, 'w') as json_file: 244 json_file.write(histogram_result.stdout) 245 246 247def _run_resource_sizes(args): 248 """Main flow to extract and output size data.""" 249 chartjson = _BASE_CHART.copy() 250 report_func = perf_tests_results_helper.ReportPerfResult 251 total_sizes = collections.Counter() 252 253 def report_sizes(sizes, title, track_stripped, track_compressed): 254 report_func(chart_data=chartjson, 255 graph_title=title, 256 trace_title='size', 257 value=sizes[_KEY_RAW], 258 units='bytes') 259 260 if track_stripped: 261 report_func(chart_data=chartjson, 262 graph_title=title + ' (Stripped)', 263 trace_title='size', 264 value=sizes[_KEY_STRIPPED], 265 units='bytes') 266 267 if track_compressed: 268 report_func(chart_data=chartjson, 269 graph_title=title + ' (Gzipped)', 270 trace_title='size', 271 value=sizes[_KEY_GZIPPED], 272 units='bytes') 273 274 if track_stripped and track_compressed: 275 report_func(chart_data=chartjson, 276 graph_title=title + ' (Stripped, Gzipped)', 277 trace_title='size', 278 value=sizes[_KEY_STRIPPED_GZIPPED], 279 units='bytes') 280 281 for g in _TRACKED_GROUPS: 282 sizes = sum( 283 map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)), 284 collections.Counter()) 285 report_sizes(sizes, g.title, g.track_stripped, g.track_compressed) 286 287 # Total compressed size is summed over individual compressed sizes, instead 288 # of concatanating first, then compress everything. This is done for 289 # simplicity. It also gives a conservative size estimate (assuming file 290 # metadata and overheads are negligible). 291 total_sizes += sizes 292 293 report_sizes(total_sizes, 'Total', True, True) 294 295 _dump_chart_json(args.output_dir, chartjson) 296 297 298def main(): 299 """Parses arguments and runs high level flows.""" 300 argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.') 301 302 argparser.add_argument('--chromium-output-directory', 303 dest='out_dir', 304 required=True, 305 type=os.path.realpath, 306 help='Location of the build artifacts.') 307 308 output_group = argparser.add_mutually_exclusive_group() 309 310 output_group.add_argument('--output-dir', 311 default='.', 312 help='Directory to save chartjson to.') 313 314 # Accepted to conform to the isolated script interface, but ignored. 315 argparser.add_argument('--isolated-script-test-filter', 316 help=argparse.SUPPRESS) 317 argparser.add_argument('--isolated-script-test-perf-output', 318 type=os.path.realpath, 319 help=argparse.SUPPRESS) 320 321 output_group.add_argument( 322 '--isolated-script-test-output', 323 type=os.path.realpath, 324 help='File to which results will be written in the simplified JSON ' 325 'output format.') 326 327 args = argparser.parse_args() 328 329 isolated_script_output = {'valid': False, 'failures': []} 330 if args.isolated_script_test_output: 331 test_name = 'lacros_resource_sizes' 332 args.output_dir = os.path.join( 333 os.path.dirname(args.isolated_script_test_output), test_name) 334 if not os.path.exists(args.output_dir): 335 os.makedirs(args.output_dir) 336 337 try: 338 _run_resource_sizes(args) 339 isolated_script_output = {'valid': True, 'failures': []} 340 finally: 341 if args.isolated_script_test_output: 342 results_path = os.path.join(args.output_dir, 'test_results.json') 343 with open(results_path, 'w') as output_file: 344 json.dump(isolated_script_output, output_file) 345 with open(args.isolated_script_test_output, 'w') as output_file: 346 json.dump(isolated_script_output, output_file) 347 348 349if __name__ == '__main__': 350 main() 351