1#!/usr/bin/env python
2# Copyright 2020 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Reports binary size metrics for LaCrOS build artifacts.
6
7More information at //docs/speed/binary_size/metrics.md.
8"""
9
10import argparse
11import collections
12import contextlib
13import json
14import logging
15import os
16import subprocess
17import sys
18import tempfile
19
20
21@contextlib.contextmanager
22def _SysPath(path):
23  """Library import context that temporarily appends |path| to |sys.path|."""
24  if path and path not in sys.path:
25    sys.path.insert(0, path)
26  else:
27    path = None  # Indicates that |sys.path| is not modified.
28  try:
29    yield
30  finally:
31    if path:
32      sys.path.pop(0)
33
34
35DIR_SOURCE_ROOT = os.environ.get(
36    'CHECKOUT_SOURCE_ROOT',
37    os.path.abspath(
38        os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)))
39
40BUILD_COMMON_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util', 'lib',
41                                 'common')
42
43TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult',
44                            'tracing')
45
46EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party',
47                             'eu-strip', 'bin', 'eu-strip')
48
49with _SysPath(BUILD_COMMON_PATH):
50  import perf_tests_results_helper  # pylint: disable=import-error
51
52with _SysPath(TRACING_PATH):
53  from tracing.value import convert_chart_json  # pylint: disable=import-error
54
55_BASE_CHART = {
56    'format_version': '0.1',
57    'benchmark_name': 'resource_sizes',
58    'benchmark_description': 'LaCrOS resource size information.',
59    'trace_rerun_options': [],
60    'charts': {}
61}
62
63_KEY_RAW = 'raw'
64_KEY_GZIPPED = 'gzipped'
65_KEY_STRIPPED = 'stripped'
66_KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped'
67
68
69class _Group:
70  """A group of build artifacts whose file sizes are summed and tracked.
71
72  Build artifacts for size tracking fall under these categories:
73  * File: A single file.
74  * Group: A collection of files.
75  * Dir: All files under a directory.
76
77  Attributes:
78    paths: A list of files or directories to be tracked together.
79    title: The display name of the group.
80    track_stripped: Whether to also track summed stripped ELF sizes.
81    track_compressed: Whether to also track summed compressed sizes.
82  """
83
84  def __init__(self, paths, title, track_stripped=False,
85               track_compressed=False):
86    self.paths = paths
87    self.title = title
88    self.track_stripped = track_stripped
89    self.track_compressed = track_compressed
90
91
92# List of disjoint build artifact groups for size tracking. This list should be
93# synched with lacros-amd64-generic-binary-size-rel builder contents (specified
94# in # //infra/config/subprojects/chromium/ci.star) and
95# chromeos-amd64-generic-lacros-internal builder (specified in src-internal).
96_TRACKED_GROUPS = [
97    _Group(paths=['chrome'],
98           title='File: chrome',
99           track_stripped=True,
100           track_compressed=True),
101    _Group(paths=['chrome_crashpad_handler'],
102           title='File: chrome_crashpad_handler'),
103    _Group(paths=['icudtl.dat'], title='File: icudtl.dat'),
104    _Group(paths=['nacl_helper'], title='File: nacl_helper'),
105    _Group(paths=['nacl_irt_x86_64.nexe'], title='File: nacl_irt_x86_64.nexe'),
106    _Group(paths=['resources.pak'], title='File: resources.pak'),
107    _Group(paths=[
108        'chrome_100_percent.pak', 'chrome_200_percent.pak', 'headless_lib.pak'
109    ],
110           title='Group: Other PAKs'),
111    _Group(paths=['snapshot_blob.bin'], title='Group: Misc'),
112    _Group(paths=['locales/'], title='Dir: locales'),
113    _Group(paths=['swiftshader/'], title='Dir: swiftshader'),
114    _Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'),
115]
116
117
118def _visit_paths(base_dir, paths):
119  """Itemizes files specified by a list of paths.
120
121  Args:
122    base_dir: Base directory for all elements in |paths|.
123    paths: A list of filenames or directory names to specify files whose sizes
124      to be counted. Directories are recursed. There's no de-duping effort.
125      Non-existing files or directories are ignored (with warning message).
126  """
127  for path in paths:
128    full_path = os.path.join(base_dir, path)
129    if os.path.exists(full_path):
130      if os.path.isdir(full_path):
131        for dirpath, _, filenames in os.walk(full_path):
132          for filename in filenames:
133            yield os.path.join(dirpath, filename)
134      else:  # Assume is file.
135        yield full_path
136    else:
137      logging.critical('Not found: %s', path)
138
139
140def _is_probably_elf(filename):
141  """Heuristically decides whether |filename| is ELF via magic signature."""
142  with open(filename, 'rb') as fh:
143    return fh.read(4) == '\x7FELF'
144
145
146def _is_unstrippable_elf(filename):
147  """Identifies known-unstrippable ELF files to denoise the system."""
148  return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so')
149
150
151def _get_filesize(filename):
152  """Returns the size of a file, or 0 if file is not found."""
153  try:
154    return os.path.getsize(filename)
155  except OSError:
156    logging.critical('Failed to get size: %s', filename)
157  return 0
158
159
160def _get_gzipped_filesize(filename):
161  """Returns the gzipped size of a file, or 0 if file is not found."""
162  BUFFER_SIZE = 65536
163  if not os.path.isfile(filename):
164    return 0
165  try:
166    # Call gzip externally instead of using gzip package since it's > 2x faster.
167    cmd = ['gzip', '-c', filename]
168    p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
169    # Manually counting bytes instead of using len(p.communicate()[0]) to avoid
170    # buffering the entire compressed data (can be ~100 MB).
171    ret = 0
172    while True:
173      chunk = len(p.stdout.read(BUFFER_SIZE))
174      if chunk == 0:
175        break
176      ret += chunk
177    return ret
178  except OSError:
179    logging.critical('Failed to get gzipped size: %s', filename)
180  return 0
181
182
183def _get_catagorized_filesizes(filename):
184  """Measures |filename| sizes under various transforms.
185
186  Returns: A Counter (keyed by _Key_* constants) that stores measured sizes.
187  """
188  sizes = collections.Counter()
189  sizes[_KEY_RAW] = _get_filesize(filename)
190  sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename)
191
192  # Pre-assign values for non-ELF, or in case of failure for ELF.
193  sizes[_KEY_STRIPPED] = sizes[_KEY_RAW]
194  sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED]
195
196  if _is_probably_elf(filename) and not _is_unstrippable_elf(filename):
197    try:
198      fd, temp_file = tempfile.mkstemp()
199      os.close(fd)
200      cmd = [EU_STRIP_PATH, filename, '-o', temp_file]
201      subprocess.check_output(cmd)
202      sizes[_KEY_STRIPPED] = _get_filesize(temp_file)
203      sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file)
204      if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]:
205        # This weird case has been observed for libwidevinecdm.so.
206        logging.critical('Stripping made things worse for %s' % filename)
207    except subprocess.CalledProcessError:
208      logging.critical('Failed to strip file: %s' % filename)
209    finally:
210      os.unlink(temp_file)
211  return sizes
212
213
214def _dump_chart_json(output_dir, chartjson):
215  """Writes chart histogram to JSON files.
216
217  Output files:
218    results-chart.json contains the chart JSON.
219    perf_results.json contains histogram JSON for Catapult.
220
221  Args:
222    output_dir: Directory to place the JSON files.
223    chartjson: Source JSON data for output files.
224  """
225  results_path = os.path.join(output_dir, 'results-chart.json')
226  logging.critical('Dumping chartjson to %s', results_path)
227  with open(results_path, 'w') as json_file:
228    json.dump(chartjson, json_file, indent=2)
229
230  # We would ideally generate a histogram set directly instead of generating
231  # chartjson then converting. However, perf_tests_results_helper is in
232  # //build, which doesn't seem to have any precedent for depending on
233  # anything in Catapult. This can probably be fixed, but since this doesn't
234  # need to be super fast or anything, converting is a good enough solution
235  # for the time being.
236  histogram_result = convert_chart_json.ConvertChartJson(results_path)
237  if histogram_result.returncode != 0:
238    raise Exception('chartjson conversion failed with error: ' +
239                    histogram_result.stdout)
240
241  histogram_path = os.path.join(output_dir, 'perf_results.json')
242  logging.critical('Dumping histograms to %s', histogram_path)
243  with open(histogram_path, 'w') as json_file:
244    json_file.write(histogram_result.stdout)
245
246
247def _run_resource_sizes(args):
248  """Main flow to extract and output size data."""
249  chartjson = _BASE_CHART.copy()
250  report_func = perf_tests_results_helper.ReportPerfResult
251  total_sizes = collections.Counter()
252
253  def report_sizes(sizes, title, track_stripped, track_compressed):
254    report_func(chart_data=chartjson,
255                graph_title=title,
256                trace_title='size',
257                value=sizes[_KEY_RAW],
258                units='bytes')
259
260    if track_stripped:
261      report_func(chart_data=chartjson,
262                  graph_title=title + ' (Stripped)',
263                  trace_title='size',
264                  value=sizes[_KEY_STRIPPED],
265                  units='bytes')
266
267    if track_compressed:
268      report_func(chart_data=chartjson,
269                  graph_title=title + ' (Gzipped)',
270                  trace_title='size',
271                  value=sizes[_KEY_GZIPPED],
272                  units='bytes')
273
274    if track_stripped and track_compressed:
275      report_func(chart_data=chartjson,
276                  graph_title=title + ' (Stripped, Gzipped)',
277                  trace_title='size',
278                  value=sizes[_KEY_STRIPPED_GZIPPED],
279                  units='bytes')
280
281  for g in _TRACKED_GROUPS:
282    sizes = sum(
283        map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)),
284        collections.Counter())
285    report_sizes(sizes, g.title, g.track_stripped, g.track_compressed)
286
287    # Total compressed size is summed over individual compressed sizes, instead
288    # of concatanating first, then compress everything. This is done for
289    # simplicity. It also gives a conservative size estimate (assuming file
290    # metadata and overheads are negligible).
291    total_sizes += sizes
292
293  report_sizes(total_sizes, 'Total', True, True)
294
295  _dump_chart_json(args.output_dir, chartjson)
296
297
298def main():
299  """Parses arguments and runs high level flows."""
300  argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.')
301
302  argparser.add_argument('--chromium-output-directory',
303                         dest='out_dir',
304                         required=True,
305                         type=os.path.realpath,
306                         help='Location of the build artifacts.')
307
308  output_group = argparser.add_mutually_exclusive_group()
309
310  output_group.add_argument('--output-dir',
311                            default='.',
312                            help='Directory to save chartjson to.')
313
314  # Accepted to conform to the isolated script interface, but ignored.
315  argparser.add_argument('--isolated-script-test-filter',
316                         help=argparse.SUPPRESS)
317  argparser.add_argument('--isolated-script-test-perf-output',
318                         type=os.path.realpath,
319                         help=argparse.SUPPRESS)
320
321  output_group.add_argument(
322      '--isolated-script-test-output',
323      type=os.path.realpath,
324      help='File to which results will be written in the simplified JSON '
325      'output format.')
326
327  args = argparser.parse_args()
328
329  isolated_script_output = {'valid': False, 'failures': []}
330  if args.isolated_script_test_output:
331    test_name = 'lacros_resource_sizes'
332    args.output_dir = os.path.join(
333        os.path.dirname(args.isolated_script_test_output), test_name)
334    if not os.path.exists(args.output_dir):
335      os.makedirs(args.output_dir)
336
337  try:
338    _run_resource_sizes(args)
339    isolated_script_output = {'valid': True, 'failures': []}
340  finally:
341    if args.isolated_script_test_output:
342      results_path = os.path.join(args.output_dir, 'test_results.json')
343      with open(results_path, 'w') as output_file:
344        json.dump(isolated_script_output, output_file)
345      with open(args.isolated_script_test_output, 'w') as output_file:
346        json.dump(isolated_script_output, output_file)
347
348
349if __name__ == '__main__':
350  main()
351