1#!/usr/bin/env vpython
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Compares two orderfiles, from filenames or a commit.
7
8This shows some statistics about two orderfiles, possibly extracted from an
9updating commit made by the orderfile bot.
10"""
11
12from __future__ import print_function
13
14import argparse
15import collections
16import logging
17import os
18import subprocess
19import sys
20
21
22def ParseOrderfile(filename):
23  """Parses an orderfile into a list of symbols.
24
25  Args:
26    filename: (str) Path to the orderfile.
27
28  Returns:
29    [str] List of symbols.
30  """
31  symbols = []
32  lines = []
33  already_seen = set()
34  with open(filename, 'r') as f:
35    lines = [line.strip() for line in f]
36
37  # The (new) orderfiles that are oriented at the LLD linker contain only symbol
38  # names (i.e. not prefixed with '.text'). The (old) orderfiles aimed at the
39  # Gold linker were patched by duplicating symbols prefixed with '.text.hot.',
40  # '.text.unlikely.' and '.text.', hence the appearance of '.text' on the first
41  # symbol indicates such a legacy orderfile.
42  if not lines[0].startswith('.text.'):
43    for entry in lines:
44      symbol_name = entry.rstrip('\n')
45      assert symbol_name != '*' and symbol_name != '.text'
46      already_seen.add(symbol_name)
47      symbols.append(symbol_name)
48  else:
49    for entry in lines:
50      # Keep only (input) section names, not symbol names (only rare special
51      # symbols contain '.'). We could only keep symbols, but then some even
52      # older orderfiles would not be parsed.
53      if '.' not in entry:
54        continue
55      # Example: .text.startup.BLA
56      symbol_name = entry[entry.rindex('.'):]
57      if symbol_name in already_seen or symbol_name == '*' or entry == '.text':
58        continue
59      already_seen.add(symbol_name)
60      symbols.append(symbol_name)
61  return symbols
62
63
64def CommonSymbolsToOrder(symbols, common_symbols):
65  """Returns s -> index for all s in common_symbols."""
66  result = {}
67  index = 0
68  for s in symbols:
69    if s not in common_symbols:
70      continue
71    result[s] = index
72    index += 1
73  return result
74
75
76CompareResult = collections.namedtuple(
77    'CompareResult', ('first_count', 'second_count',
78                      'new_count', 'removed_count',
79                      'average_fractional_distance'))
80
81def Compare(first_filename, second_filename):
82  """Outputs a comparison of two orderfiles to stdout.
83
84  Args:
85    first_filename: (str) First orderfile.
86    second_filename: (str) Second orderfile.
87
88  Returns:
89    An instance of CompareResult.
90  """
91  first_symbols = ParseOrderfile(first_filename)
92  second_symbols = ParseOrderfile(second_filename)
93  print('Symbols count:\n\tfirst:\t%d\n\tsecond:\t%d' % (len(first_symbols),
94                                                         len(second_symbols)))
95  first_symbols = set(first_symbols)
96  second_symbols = set(second_symbols)
97  new_symbols = second_symbols - first_symbols
98  removed_symbols = first_symbols - second_symbols
99  common_symbols = first_symbols & second_symbols
100  # Distance between orderfiles.
101  first_to_ordering = CommonSymbolsToOrder(first_symbols, common_symbols)
102  second_to_ordering = CommonSymbolsToOrder(second_symbols, common_symbols)
103  total_distance = sum(abs(first_to_ordering[s] - second_to_ordering[s])\
104                       for s in first_to_ordering)
105  # Each distance is in [0, len(common_symbols)] and there are
106  # len(common_symbols) entries, hence the normalization.
107  average_fractional_distance = float(total_distance) / (len(common_symbols)**2)
108  print('New symbols = %d' % len(new_symbols))
109  print('Removed symbols = %d' % len(removed_symbols))
110  print('Average fractional distance = %.2f%%' %
111        (100. * average_fractional_distance))
112  return CompareResult(len(first_symbols), len(second_symbols),
113                       len(new_symbols), len(removed_symbols),
114                       average_fractional_distance)
115
116
117def CheckOrderfileCommit(commit_hash, clank_path):
118  """Asserts that a commit is an orderfile update from the bot.
119
120  Args:
121    commit_hash: (str) Git hash of the orderfile roll commit.
122    clank_path: (str) Path to the clank repository.
123  """
124  output = subprocess.check_output(
125      ['git', 'show', r'--format=%an %s', commit_hash], cwd=clank_path)
126  first_line = output.split('\n')[0]
127  # Capitalization changed at some point.
128  assert first_line.upper() == 'clank-autoroller Update Orderfile.'.upper(), (
129      'Not an orderfile commit')
130
131
132def GetBeforeAfterOrderfileHashes(commit_hash, clank_path):
133  """Downloads the orderfiles before and afer an orderfile roll.
134
135  Args:
136    commit_hash: (str) Git hash of the orderfile roll commit.
137    clank_path: (str) Path to the clank repository.
138
139  Returns:
140    (str, str) Path to the before and after commit orderfiles.
141  """
142  orderfile_hash_relative_path = 'orderfiles/orderfile.arm.out.sha1'
143  before_output = subprocess.check_output(
144      ['git', 'show', '%s^:%s' % (commit_hash, orderfile_hash_relative_path)],
145      cwd=clank_path)
146  before_hash = before_output.split('\n')[0]
147  after_output = subprocess.check_output(
148      ['git', 'show', '%s:%s' % (commit_hash, orderfile_hash_relative_path)],
149      cwd=clank_path)
150  after_hash = after_output.split('\n')[0]
151  assert before_hash != after_hash
152  return (before_hash, after_hash)
153
154
155def DownloadOrderfile(orderfile_hash, output_filename):
156  """Downloads an orderfile with a given hash to a given destination."""
157  cloud_storage_path = (
158      'gs://clank-archive/orderfile-clankium/%s' % orderfile_hash)
159  subprocess.check_call(
160      ['gsutil.py', 'cp', cloud_storage_path, output_filename])
161
162
163def GetOrderfilesFromCommit(commit_hash):
164  """Returns paths to the before and after orderfiles for a commit."""
165  clank_path = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
166                            'clank')
167  logging.info('Checking that the commit is an orderfile')
168  CheckOrderfileCommit(commit_hash, clank_path)
169  (before_hash, after_hash) = GetBeforeAfterOrderfileHashes(
170      commit_hash, clank_path)
171  logging.info('Before / after hashes: %s %s', before_hash, after_hash)
172  before_filename = os.path.join('/tmp/', before_hash)
173  after_filename = os.path.join('/tmp/', after_hash)
174  logging.info('Downloading files')
175  DownloadOrderfile(before_hash, before_filename)
176  DownloadOrderfile(after_hash, after_filename)
177  return (before_filename, after_filename)
178
179
180def CreateArgumentParser():
181  """Returns the argumeng parser."""
182  parser = argparse.ArgumentParser()
183  parser.add_argument('--first', help='First orderfile')
184  parser.add_argument('--second', help='Second orderfile')
185  parser.add_argument('--keep', default=False, action='store_true',
186                      help='Keep the downloaded orderfiles')
187  parser.add_argument('--from-commit', help='Analyze the difference in the '
188                      'orderfile from an orderfile bot commit.')
189  parser.add_argument('--csv-output', help='Appends the result to a CSV file.')
190  return parser
191
192
193def main():
194  logging.basicConfig(level=logging.INFO)
195  parser = CreateArgumentParser()
196  args = parser.parse_args()
197  if args.first or args.second:
198    assert args.first and args.second, 'Need both files.'
199    Compare(args.first, args.second)
200  elif args.from_commit:
201    first, second = GetOrderfilesFromCommit(args.from_commit)
202    try:
203      logging.info('Comparing the orderfiles')
204      result = Compare(first, second)
205      if args.csv_output:
206        with open(args.csv_output, 'a') as f:
207          f.write('%s,%d,%d,%d,%d,%f\n' % tuple(
208              [args.from_commit] + list(result)))
209    finally:
210      if not args.keep:
211        os.remove(first)
212        os.remove(second)
213  else:
214    return False
215  return True
216
217
218if __name__ == '__main__':
219  sys.exit(0 if main() else 1)
220