1#!/usr/bin/env python3
2#
3# Copyright 2017 gRPC authors.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16""" Computes the diff between two bm runs and outputs significant results """
17
18import argparse
19import collections
20import json
21import os
22import subprocess
23import sys
24
25sys.path.append(os.path.join(os.path.dirname(sys.argv[0]), '..'))
26
27import bm_constants
28import bm_json
29import bm_speedup
30import tabulate
31
32verbose = False
33
34
35def _median(ary):
36    assert (len(ary))
37    ary = sorted(ary)
38    n = len(ary)
39    if n % 2 == 0:
40        return (ary[(n - 1) // 2] + ary[(n - 1) // 2 + 1]) / 2.0
41    else:
42        return ary[n // 2]
43
44
45def _args():
46    argp = argparse.ArgumentParser(
47        description='Perform diff on microbenchmarks')
48    argp.add_argument('-t',
49                      '--track',
50                      choices=sorted(bm_constants._INTERESTING),
51                      nargs='+',
52                      default=sorted(bm_constants._INTERESTING),
53                      help='Which metrics to track')
54    argp.add_argument('-b',
55                      '--benchmarks',
56                      nargs='+',
57                      choices=bm_constants._AVAILABLE_BENCHMARK_TESTS,
58                      default=bm_constants._AVAILABLE_BENCHMARK_TESTS,
59                      help='Which benchmarks to run')
60    argp.add_argument(
61        '-l',
62        '--loops',
63        type=int,
64        default=20,
65        help=
66        'Number of times to loops the benchmarks. Must match what was passed to bm_run.py'
67    )
68    argp.add_argument('-r',
69                      '--regex',
70                      type=str,
71                      default="",
72                      help='Regex to filter benchmarks run')
73    argp.add_argument('--counters', dest='counters', action='store_true')
74    argp.add_argument('--no-counters', dest='counters', action='store_false')
75    argp.set_defaults(counters=True)
76    argp.add_argument('-n', '--new', type=str, help='New benchmark name')
77    argp.add_argument('-o', '--old', type=str, help='Old benchmark name')
78    argp.add_argument('-v',
79                      '--verbose',
80                      type=bool,
81                      help='Print details of before/after')
82    args = argp.parse_args()
83    global verbose
84    if args.verbose:
85        verbose = True
86    assert args.new
87    assert args.old
88    return args
89
90
91def _maybe_print(str):
92    if verbose:
93        print(str)
94
95
96class Benchmark:
97
98    def __init__(self):
99        self.samples = {
100            True: collections.defaultdict(list),
101            False: collections.defaultdict(list)
102        }
103        self.final = {}
104
105    def add_sample(self, track, data, new):
106        for f in track:
107            if f in data:
108                self.samples[new][f].append(float(data[f]))
109
110    def process(self, track, new_name, old_name):
111        for f in sorted(track):
112            new = self.samples[True][f]
113            old = self.samples[False][f]
114            if not new or not old:
115                continue
116            mdn_diff = abs(_median(new) - _median(old))
117            _maybe_print('%s: %s=%r %s=%r mdn_diff=%r' %
118                         (f, new_name, new, old_name, old, mdn_diff))
119            s = bm_speedup.speedup(new, old, 1e-5)
120            if abs(s) > 3:
121                if mdn_diff > 0.5 or 'trickle' in f:
122                    self.final[f] = '%+d%%' % s
123        return self.final.keys()
124
125    def skip(self):
126        return not self.final
127
128    def row(self, flds):
129        return [self.final[f] if f in self.final else '' for f in flds]
130
131
132def _read_json(filename, badjson_files, nonexistant_files):
133    stripped = ".".join(filename.split(".")[:-2])
134    try:
135        with open(filename) as f:
136            r = f.read()
137            return json.loads(r)
138    except IOError as e:
139        if stripped in nonexistant_files:
140            nonexistant_files[stripped] += 1
141        else:
142            nonexistant_files[stripped] = 1
143        return None
144    except ValueError as e:
145        print(r)
146        if stripped in badjson_files:
147            badjson_files[stripped] += 1
148        else:
149            badjson_files[stripped] = 1
150        return None
151
152
153def fmt_dict(d):
154    return ''.join(["    " + k + ": " + str(d[k]) + "\n" for k in d])
155
156
157def diff(bms, loops, regex, track, old, new, counters):
158    benchmarks = collections.defaultdict(Benchmark)
159
160    badjson_files = {}
161    nonexistant_files = {}
162    for bm in bms:
163        for loop in range(0, loops):
164            for line in subprocess.check_output([
165                    'bm_diff_%s/opt/%s' % (old, bm), '--benchmark_list_tests',
166                    '--benchmark_filter=%s' % regex
167            ]).splitlines():
168                line = line.decode('UTF-8')
169                stripped_line = line.strip().replace("/", "_").replace(
170                    "<", "_").replace(">", "_").replace(", ", "_")
171                js_new_opt = _read_json(
172                    '%s.%s.opt.%s.%d.json' % (bm, stripped_line, new, loop),
173                    badjson_files, nonexistant_files)
174                js_old_opt = _read_json(
175                    '%s.%s.opt.%s.%d.json' % (bm, stripped_line, old, loop),
176                    badjson_files, nonexistant_files)
177                if counters:
178                    js_new_ctr = _read_json(
179                        '%s.%s.counters.%s.%d.json' %
180                        (bm, stripped_line, new, loop), badjson_files,
181                        nonexistant_files)
182                    js_old_ctr = _read_json(
183                        '%s.%s.counters.%s.%d.json' %
184                        (bm, stripped_line, old, loop), badjson_files,
185                        nonexistant_files)
186                else:
187                    js_new_ctr = None
188                    js_old_ctr = None
189
190                for row in bm_json.expand_json(js_new_ctr, js_new_opt):
191                    name = row['cpp_name']
192                    if name.endswith('_mean') or name.endswith('_stddev'):
193                        continue
194                    benchmarks[name].add_sample(track, row, True)
195                for row in bm_json.expand_json(js_old_ctr, js_old_opt):
196                    name = row['cpp_name']
197                    if name.endswith('_mean') or name.endswith('_stddev'):
198                        continue
199                    benchmarks[name].add_sample(track, row, False)
200
201    really_interesting = set()
202    for name, bm in benchmarks.items():
203        _maybe_print(name)
204        really_interesting.update(bm.process(track, new, old))
205    fields = [f for f in track if f in really_interesting]
206
207    headers = ['Benchmark'] + fields
208    rows = []
209    for name in sorted(benchmarks.keys()):
210        if benchmarks[name].skip():
211            continue
212        rows.append([name] + benchmarks[name].row(fields))
213    note = None
214    if len(badjson_files):
215        note = 'Corrupt JSON data (indicates timeout or crash): \n%s' % fmt_dict(
216            badjson_files)
217    if len(nonexistant_files):
218        if note:
219            note += '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict(
220                nonexistant_files)
221        else:
222            note = '\n\nMissing files (indicates new benchmark): \n%s' % fmt_dict(
223                nonexistant_files)
224    if rows:
225        return tabulate.tabulate(rows, headers=headers, floatfmt='+.2f'), note
226    else:
227        return None, note
228
229
230if __name__ == '__main__':
231    args = _args()
232    diff, note = diff(args.benchmarks, args.loops, args.regex, args.track,
233                      args.old, args.new, args.counters)
234    print('%s\n%s' % (note, diff if diff else "No performance differences"))
235