1"""report.py - Utilities for reporting statistics about benchmark results
2"""
3
4import unittest
5import os
6import re
7import copy
8import random
9
10from scipy.stats import mannwhitneyu
11
12
13class BenchmarkColor(object):
14    def __init__(self, name, code):
15        self.name = name
16        self.code = code
17
18    def __repr__(self):
19        return '%s%r' % (self.__class__.__name__,
20                         (self.name, self.code))
21
22    def __format__(self, format):
23        return self.code
24
25
26# Benchmark Colors Enumeration
27BC_NONE = BenchmarkColor('NONE', '')
28BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
29BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
30BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
31BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
32BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
33BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
34BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
35BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
36BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
37BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
38BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
39
40UTEST_MIN_REPETITIONS = 2
41UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
42UTEST_COL_NAME = "_pvalue"
43
44
45def color_format(use_color, fmt_str, *args, **kwargs):
46    """
47    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
48    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
49    is False then all color codes in 'args' and 'kwargs' are replaced with
50    the empty string.
51    """
52    assert use_color is True or use_color is False
53    if not use_color:
54        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
55                for arg in args]
56        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
57                  for key, arg in kwargs.items()}
58    return fmt_str.format(*args, **kwargs)
59
60
61def find_longest_name(benchmark_list):
62    """
63    Return the length of the longest benchmark name in a given list of
64    benchmark JSON objects
65    """
66    longest_name = 1
67    for bc in benchmark_list:
68        if len(bc['name']) > longest_name:
69            longest_name = len(bc['name'])
70    return longest_name
71
72
73def calculate_change(old_val, new_val):
74    """
75    Return a float representing the decimal change between old_val and new_val.
76    """
77    if old_val == 0 and new_val == 0:
78        return 0.0
79    if old_val == 0:
80        return float(new_val - old_val) / (float(old_val + new_val) / 2)
81    return float(new_val - old_val) / abs(old_val)
82
83
84def filter_benchmark(json_orig, family, replacement=""):
85    """
86    Apply a filter to the json, and only leave the 'family' of benchmarks.
87    """
88    regex = re.compile(family)
89    filtered = {}
90    filtered['benchmarks'] = []
91    for be in json_orig['benchmarks']:
92        if not regex.search(be['name']):
93            continue
94        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
95        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
96        filtered['benchmarks'].append(filteredbench)
97    return filtered
98
99
100def get_unique_benchmark_names(json):
101    """
102    While *keeping* the order, give all the unique 'names' used for benchmarks.
103    """
104    seen = set()
105    uniqued = [x['name'] for x in json['benchmarks']
106               if x['name'] not in seen and
107               (seen.add(x['name']) or True)]
108    return uniqued
109
110
111def intersect(list1, list2):
112    """
113    Given two lists, get a new list consisting of the elements only contained
114    in *both of the input lists*, while preserving the ordering.
115    """
116    return [x for x in list1 if x in list2]
117
118
119def is_potentially_comparable_benchmark(x):
120    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
121
122
123def partition_benchmarks(json1, json2):
124    """
125    While preserving the ordering, find benchmarks with the same names in
126    both of the inputs, and group them.
127    (i.e. partition/filter into groups with common name)
128    """
129    json1_unique_names = get_unique_benchmark_names(json1)
130    json2_unique_names = get_unique_benchmark_names(json2)
131    names = intersect(json1_unique_names, json2_unique_names)
132    partitions = []
133    for name in names:
134        time_unit = None
135        # Pick the time unit from the first entry of the lhs benchmark.
136        # We should be careful not to crash with unexpected input.
137        for x in json1['benchmarks']:
138            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
139                time_unit = x['time_unit']
140                break
141        if time_unit is None:
142            continue
143        # Filter by name and time unit.
144        # All the repetitions are assumed to be comparable.
145        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
146               x['time_unit'] == time_unit]
147        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
148               x['time_unit'] == time_unit]
149        partitions.append([lhs, rhs])
150    return partitions
151
152
153def extract_field(partition, field_name):
154    # The count of elements may be different. We want *all* of them.
155    lhs = [x[field_name] for x in partition[0]]
156    rhs = [x[field_name] for x in partition[1]]
157    return [lhs, rhs]
158
159
160def calc_utest(timings_cpu, timings_time):
161    min_rep_cnt = min(len(timings_time[0]),
162                      len(timings_time[1]),
163                      len(timings_cpu[0]),
164                      len(timings_cpu[1]))
165
166    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
167    if min_rep_cnt < UTEST_MIN_REPETITIONS:
168        return False, None, None
169
170    time_pvalue = mannwhitneyu(
171        timings_time[0], timings_time[1], alternative='two-sided').pvalue
172    cpu_pvalue = mannwhitneyu(
173        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
174
175    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
176
177def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
178    def get_utest_color(pval):
179        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
180
181    # Check if we failed miserably with minimum required repetitions for utest
182    if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
183        return []
184
185    dsc = "U Test, Repetitions: {} vs {}".format(
186        utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
187    dsc_color = BC_OKGREEN
188
189    # We still got some results to show but issue a warning about it.
190    if not utest['have_optimal_repetitions']:
191        dsc_color = BC_WARNING
192        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
193            UTEST_OPTIMAL_REPETITIONS)
194
195    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
196
197    return [color_format(use_color,
198                         special_str,
199                         BC_HEADER,
200                         "{}{}".format(bc_name, UTEST_COL_NAME),
201                         first_col_width,
202                         get_utest_color(
203                             utest['time_pvalue']), utest['time_pvalue'],
204                         get_utest_color(
205                             utest['cpu_pvalue']), utest['cpu_pvalue'],
206                         dsc_color, dsc,
207                         endc=BC_ENDC)]
208
209
210def get_difference_report(
211        json1,
212        json2,
213        utest=False):
214    """
215    Calculate and report the difference between each test of two benchmarks
216    runs specified as 'json1' and 'json2'. Output is another json containing
217    relevant details for each test run.
218    """
219    assert utest is True or utest is False
220
221    diff_report = []
222    partitions = partition_benchmarks(json1, json2)
223    for partition in partitions:
224        benchmark_name = partition[0][0]['name']
225        time_unit = partition[0][0]['time_unit']
226        measurements = []
227        utest_results = {}
228        # Careful, we may have different repetition count.
229        for i in range(min(len(partition[0]), len(partition[1]))):
230            bn = partition[0][i]
231            other_bench = partition[1][i]
232            measurements.append({
233                'real_time': bn['real_time'],
234                'cpu_time': bn['cpu_time'],
235                'real_time_other': other_bench['real_time'],
236                'cpu_time_other': other_bench['cpu_time'],
237                'time': calculate_change(bn['real_time'], other_bench['real_time']),
238                'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
239            })
240
241        # After processing the whole partition, if requested, do the U test.
242        if utest:
243            timings_cpu = extract_field(partition, 'cpu_time')
244            timings_time = extract_field(partition, 'real_time')
245            have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
246            if cpu_pvalue and time_pvalue:
247                utest_results = {
248                    'have_optimal_repetitions': have_optimal_repetitions,
249                    'cpu_pvalue': cpu_pvalue,
250                    'time_pvalue': time_pvalue,
251                    'nr_of_repetitions': len(timings_cpu[0]),
252                    'nr_of_repetitions_other': len(timings_cpu[1])
253                }
254
255        # Store only if we had any measurements for given benchmark.
256        # E.g. partition_benchmarks will filter out the benchmarks having
257        # time units which are not compatible with other time units in the
258        # benchmark suite.
259        if measurements:
260            run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
261            aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
262            diff_report.append({
263                'name': benchmark_name,
264                'measurements': measurements,
265                'time_unit': time_unit,
266                'run_type': run_type,
267                'aggregate_name': aggregate_name,
268                'utest': utest_results
269            })
270
271    return diff_report
272
273
274def print_difference_report(
275        json_diff_report,
276        include_aggregates_only=False,
277        utest=False,
278        utest_alpha=0.05,
279        use_color=True):
280    """
281    Calculate and report the difference between each test of two benchmarks
282    runs specified as 'json1' and 'json2'.
283    """
284    assert utest is True or utest is False
285
286    def get_color(res):
287        if res > 0.05:
288            return BC_FAIL
289        elif res > -0.07:
290            return BC_WHITE
291        else:
292            return BC_CYAN
293
294    first_col_width = find_longest_name(json_diff_report)
295    first_col_width = max(
296        first_col_width,
297        len('Benchmark'))
298    first_col_width += len(UTEST_COL_NAME)
299    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
300        'Benchmark', 12 + first_col_width)
301    output_strs = [first_line, '-' * len(first_line)]
302
303    fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
304    for benchmark in json_diff_report:
305        # *If* we were asked to only include aggregates,
306        # and if it is non-aggregate, then don't print it.
307        if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
308            for measurement in benchmark['measurements']:
309                output_strs += [color_format(use_color,
310                                            fmt_str,
311                                            BC_HEADER,
312                                            benchmark['name'],
313                                            first_col_width,
314                                            get_color(measurement['time']),
315                                            measurement['time'],
316                                            get_color(measurement['cpu']),
317                                            measurement['cpu'],
318                                            measurement['real_time'],
319                                            measurement['real_time_other'],
320                                            measurement['cpu_time'],
321                                            measurement['cpu_time_other'],
322                                            endc=BC_ENDC)]
323
324        # After processing the measurements, if requested and
325        # if applicable (e.g. u-test exists for given benchmark),
326        # print the U test.
327        if utest and benchmark['utest']:
328            output_strs += print_utest(benchmark['name'],
329                                       benchmark['utest'],
330                                       utest_alpha=utest_alpha,
331                                       first_col_width=first_col_width,
332                                       use_color=use_color)
333
334    return output_strs
335
336
337###############################################################################
338# Unit tests
339
340
341class TestGetUniqueBenchmarkNames(unittest.TestCase):
342    def load_results(self):
343        import json
344        testInputs = os.path.join(
345            os.path.dirname(
346                os.path.realpath(__file__)),
347            'Inputs')
348        testOutput = os.path.join(testInputs, 'test3_run0.json')
349        with open(testOutput, 'r') as f:
350            json = json.load(f)
351        return json
352
353    def test_basic(self):
354        expect_lines = [
355            'BM_One',
356            'BM_Two',
357            'short',  # These two are not sorted
358            'medium',  # These two are not sorted
359        ]
360        json = self.load_results()
361        output_lines = get_unique_benchmark_names(json)
362        print("\n")
363        print("\n".join(output_lines))
364        self.assertEqual(len(output_lines), len(expect_lines))
365        for i in range(0, len(output_lines)):
366            self.assertEqual(expect_lines[i], output_lines[i])
367
368
369class TestReportDifference(unittest.TestCase):
370    @classmethod
371    def setUpClass(cls):
372        def load_results():
373            import json
374            testInputs = os.path.join(
375                os.path.dirname(
376                    os.path.realpath(__file__)),
377                'Inputs')
378            testOutput1 = os.path.join(testInputs, 'test1_run1.json')
379            testOutput2 = os.path.join(testInputs, 'test1_run2.json')
380            with open(testOutput1, 'r') as f:
381                json1 = json.load(f)
382            with open(testOutput2, 'r') as f:
383                json2 = json.load(f)
384            return json1, json2
385
386        json1, json2 = load_results()
387        cls.json_diff_report = get_difference_report(json1, json2)
388
389    def test_json_diff_report_pretty_printing(self):
390        expect_lines = [
391            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
392            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
393            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
394            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
395            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
396            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
397            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
398            ['BM_100xSlower', '+99.0000', '+99.0000',
399                '100', '10000', '100', '10000'],
400            ['BM_100xFaster', '-0.9900', '-0.9900',
401                '10000', '100', '10000', '100'],
402            ['BM_10PercentCPUToTime', '+0.1000',
403                '-0.1000', '100', '110', '100', '90'],
404            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
405            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
406        ]
407        output_lines_with_header = print_difference_report(
408            self.json_diff_report, use_color=False)
409        output_lines = output_lines_with_header[2:]
410        print("\n")
411        print("\n".join(output_lines_with_header))
412        self.assertEqual(len(output_lines), len(expect_lines))
413        for i in range(0, len(output_lines)):
414            parts = [x for x in output_lines[i].split(' ') if x]
415            self.assertEqual(len(parts), 7)
416            self.assertEqual(expect_lines[i], parts)
417
418    def test_json_diff_report_output(self):
419        expected_output = [
420            {
421                'name': 'BM_SameTimes',
422                'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}],
423                'time_unit': 'ns',
424                'utest': {}
425            },
426            {
427                'name': 'BM_2xFaster',
428                'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}],
429                'time_unit': 'ns',
430                'utest': {}
431            },
432            {
433                'name': 'BM_2xSlower',
434                'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}],
435                'time_unit': 'ns',
436                'utest': {}
437            },
438            {
439                'name': 'BM_1PercentFaster',
440                'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
441                'time_unit': 'ns',
442                'utest': {}
443            },
444            {
445                'name': 'BM_1PercentSlower',
446                'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}],
447                'time_unit': 'ns',
448                'utest': {}
449            },
450            {
451                'name': 'BM_10PercentFaster',
452                'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}],
453                'time_unit': 'ns',
454                'utest': {}
455            },
456            {
457                'name': 'BM_10PercentSlower',
458                'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}],
459                'time_unit': 'ns',
460                'utest': {}
461            },
462            {
463                'name': 'BM_100xSlower',
464                'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}],
465                'time_unit': 'ns',
466                'utest': {}
467            },
468            {
469                'name': 'BM_100xFaster',
470                'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}],
471                'time_unit': 'ns',
472                'utest': {}
473            },
474            {
475                'name': 'BM_10PercentCPUToTime',
476                'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}],
477                'time_unit': 'ns',
478                'utest': {}
479            },
480            {
481                'name': 'BM_ThirdFaster',
482                'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}],
483                'time_unit': 'ns',
484                'utest': {}
485            },
486            {
487                'name': 'BM_NotBadTimeUnit',
488                'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
489                'time_unit': 's',
490                'utest': {}
491            },
492        ]
493        self.assertEqual(len(self.json_diff_report), len(expected_output))
494        for out, expected in zip(
495                self.json_diff_report, expected_output):
496            self.assertEqual(out['name'], expected['name'])
497            self.assertEqual(out['time_unit'], expected['time_unit'])
498            assert_utest(self, out, expected)
499            assert_measurements(self, out, expected)
500
501
502class TestReportDifferenceBetweenFamilies(unittest.TestCase):
503    @classmethod
504    def setUpClass(cls):
505        def load_result():
506            import json
507            testInputs = os.path.join(
508                os.path.dirname(
509                    os.path.realpath(__file__)),
510                'Inputs')
511            testOutput = os.path.join(testInputs, 'test2_run.json')
512            with open(testOutput, 'r') as f:
513                json = json.load(f)
514            return json
515
516        json = load_result()
517        json1 = filter_benchmark(json, "BM_Z.ro", ".")
518        json2 = filter_benchmark(json, "BM_O.e", ".")
519        cls.json_diff_report = get_difference_report(json1, json2)
520
521    def test_json_diff_report_pretty_printing(self):
522        expect_lines = [
523            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
524            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
525            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
526            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
527        ]
528        output_lines_with_header = print_difference_report(
529            self.json_diff_report, use_color=False)
530        output_lines = output_lines_with_header[2:]
531        print("\n")
532        print("\n".join(output_lines_with_header))
533        self.assertEqual(len(output_lines), len(expect_lines))
534        for i in range(0, len(output_lines)):
535            parts = [x for x in output_lines[i].split(' ') if x]
536            self.assertEqual(len(parts), 7)
537            self.assertEqual(expect_lines[i], parts)
538
539    def test_json_diff_report(self):
540        expected_output = [
541            {
542                'name': u'.',
543                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
544                'time_unit': 'ns',
545                'utest': {}
546            },
547            {
548                'name': u'./4',
549                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
550                'time_unit': 'ns',
551                'utest': {},
552            },
553            {
554                'name': u'Prefix/.',
555                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
556                'time_unit': 'ns',
557                'utest': {}
558            },
559            {
560                'name': u'Prefix/./3',
561                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
562                'time_unit': 'ns',
563                'utest': {}
564            }
565        ]
566        self.assertEqual(len(self.json_diff_report), len(expected_output))
567        for out, expected in zip(
568                self.json_diff_report, expected_output):
569            self.assertEqual(out['name'], expected['name'])
570            self.assertEqual(out['time_unit'], expected['time_unit'])
571            assert_utest(self, out, expected)
572            assert_measurements(self, out, expected)
573
574
575class TestReportDifferenceWithUTest(unittest.TestCase):
576    @classmethod
577    def setUpClass(cls):
578        def load_results():
579            import json
580            testInputs = os.path.join(
581                os.path.dirname(
582                    os.path.realpath(__file__)),
583                'Inputs')
584            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
585            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
586            with open(testOutput1, 'r') as f:
587                json1 = json.load(f)
588            with open(testOutput2, 'r') as f:
589                json2 = json.load(f)
590            return json1, json2
591
592        json1, json2 = load_results()
593        cls.json_diff_report = get_difference_report(
594            json1, json2, utest=True)
595
596    def test_json_diff_report_pretty_printing(self):
597        expect_lines = [
598            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
599            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
600            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
601            ['BM_Two_pvalue',
602             '0.6985',
603             '0.6985',
604             'U',
605             'Test,',
606             'Repetitions:',
607             '2',
608             'vs',
609             '2.',
610             'WARNING:',
611             'Results',
612             'unreliable!',
613             '9+',
614             'repetitions',
615             'recommended.'],
616            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
617            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
618            ['short_pvalue',
619             '0.7671',
620             '0.1489',
621             'U',
622             'Test,',
623             'Repetitions:',
624             '2',
625             'vs',
626             '3.',
627             'WARNING:',
628             'Results',
629             'unreliable!',
630             '9+',
631             'repetitions',
632             'recommended.'],
633            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
634        ]
635        output_lines_with_header = print_difference_report(
636            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
637        output_lines = output_lines_with_header[2:]
638        print("\n")
639        print("\n".join(output_lines_with_header))
640        self.assertEqual(len(output_lines), len(expect_lines))
641        for i in range(0, len(output_lines)):
642            parts = [x for x in output_lines[i].split(' ') if x]
643            self.assertEqual(expect_lines[i], parts)
644
645    def test_json_diff_report_pretty_printing_aggregates_only(self):
646        expect_lines = [
647            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
648            ['BM_Two_pvalue',
649             '0.6985',
650             '0.6985',
651             'U',
652             'Test,',
653             'Repetitions:',
654             '2',
655             'vs',
656             '2.',
657             'WARNING:',
658             'Results',
659             'unreliable!',
660             '9+',
661             'repetitions',
662             'recommended.'],
663            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
664            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
665            ['short_pvalue',
666             '0.7671',
667             '0.1489',
668             'U',
669             'Test,',
670             'Repetitions:',
671             '2',
672             'vs',
673             '3.',
674             'WARNING:',
675             'Results',
676             'unreliable!',
677             '9+',
678             'repetitions',
679             'recommended.'],
680        ]
681        output_lines_with_header = print_difference_report(
682            self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
683        output_lines = output_lines_with_header[2:]
684        print("\n")
685        print("\n".join(output_lines_with_header))
686        self.assertEqual(len(output_lines), len(expect_lines))
687        for i in range(0, len(output_lines)):
688            parts = [x for x in output_lines[i].split(' ') if x]
689            self.assertEqual(expect_lines[i], parts)
690
691    def test_json_diff_report(self):
692        expected_output = [
693            {
694                'name': u'BM_One',
695                'measurements': [
696                    {'time': -0.1,
697                     'cpu': 0.1,
698                     'real_time': 10,
699                     'real_time_other': 9,
700                     'cpu_time': 100,
701                     'cpu_time_other': 110}
702                ],
703                'time_unit': 'ns',
704                'utest': {}
705            },
706            {
707                'name': u'BM_Two',
708                'measurements': [
709                    {'time': 0.1111111111111111,
710                     'cpu': -0.011111111111111112,
711                     'real_time': 9,
712                     'real_time_other': 10,
713                     'cpu_time': 90,
714                     'cpu_time_other': 89},
715                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
716                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
717                ],
718                'time_unit': 'ns',
719                'utest': {
720                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
721                }
722            },
723            {
724                'name': u'short',
725                'measurements': [
726                    {'time': -0.125,
727                     'cpu': -0.0625,
728                     'real_time': 8,
729                     'real_time_other': 7,
730                     'cpu_time': 80,
731                     'cpu_time_other': 75},
732                    {'time': -0.4325,
733                     'cpu': -0.13506493506493514,
734                     'real_time': 8,
735                     'real_time_other': 4.54,
736                     'cpu_time': 77,
737                     'cpu_time_other': 66.6}
738                ],
739                'time_unit': 'ns',
740                'utest': {
741                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
742                }
743            },
744            {
745                'name': u'medium',
746                'measurements': [
747                    {'time': -0.375,
748                     'cpu': -0.3375,
749                     'real_time': 8,
750                     'real_time_other': 5,
751                     'cpu_time': 80,
752                     'cpu_time_other': 53}
753                ],
754                'time_unit': 'ns',
755                'utest': {}
756            }
757        ]
758        self.assertEqual(len(self.json_diff_report), len(expected_output))
759        for out, expected in zip(
760                self.json_diff_report, expected_output):
761            self.assertEqual(out['name'], expected['name'])
762            self.assertEqual(out['time_unit'], expected['time_unit'])
763            assert_utest(self, out, expected)
764            assert_measurements(self, out, expected)
765
766
767class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
768        unittest.TestCase):
769    @classmethod
770    def setUpClass(cls):
771        def load_results():
772            import json
773            testInputs = os.path.join(
774                os.path.dirname(
775                    os.path.realpath(__file__)),
776                'Inputs')
777            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
778            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
779            with open(testOutput1, 'r') as f:
780                json1 = json.load(f)
781            with open(testOutput2, 'r') as f:
782                json2 = json.load(f)
783            return json1, json2
784
785        json1, json2 = load_results()
786        cls.json_diff_report = get_difference_report(
787            json1, json2, utest=True)
788
789    def test_json_diff_report_pretty_printing(self):
790        expect_lines = [
791            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
792            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
793            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
794            ['BM_Two_pvalue',
795             '0.6985',
796             '0.6985',
797             'U',
798             'Test,',
799             'Repetitions:',
800             '2',
801             'vs',
802             '2.',
803             'WARNING:',
804             'Results',
805             'unreliable!',
806             '9+',
807             'repetitions',
808             'recommended.'],
809            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
810            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
811            ['short_pvalue',
812             '0.7671',
813             '0.1489',
814             'U',
815             'Test,',
816             'Repetitions:',
817             '2',
818             'vs',
819             '3.',
820             'WARNING:',
821             'Results',
822             'unreliable!',
823             '9+',
824             'repetitions',
825             'recommended.'],
826             ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53']
827        ]
828        output_lines_with_header = print_difference_report(
829            self.json_diff_report,
830            utest=True, utest_alpha=0.05, use_color=False)
831        output_lines = output_lines_with_header[2:]
832        print("\n")
833        print("\n".join(output_lines_with_header))
834        self.assertEqual(len(output_lines), len(expect_lines))
835        for i in range(0, len(output_lines)):
836            parts = [x for x in output_lines[i].split(' ') if x]
837            self.assertEqual(expect_lines[i], parts)
838
839    def test_json_diff_report(self):
840        expected_output = [
841            {
842                'name': u'BM_One',
843                'measurements': [
844                    {'time': -0.1,
845                     'cpu': 0.1,
846                     'real_time': 10,
847                     'real_time_other': 9,
848                     'cpu_time': 100,
849                     'cpu_time_other': 110}
850                ],
851                'time_unit': 'ns',
852                'utest': {}
853            },
854            {
855                'name': u'BM_Two',
856                'measurements': [
857                    {'time': 0.1111111111111111,
858                     'cpu': -0.011111111111111112,
859                     'real_time': 9,
860                     'real_time_other': 10,
861                     'cpu_time': 90,
862                     'cpu_time_other': 89},
863                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
864                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
865                ],
866                'time_unit': 'ns',
867                'utest': {
868                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
869                }
870            },
871            {
872                'name': u'short',
873                'measurements': [
874                    {'time': -0.125,
875                     'cpu': -0.0625,
876                     'real_time': 8,
877                     'real_time_other': 7,
878                     'cpu_time': 80,
879                     'cpu_time_other': 75},
880                    {'time': -0.4325,
881                     'cpu': -0.13506493506493514,
882                     'real_time': 8,
883                     'real_time_other': 4.54,
884                     'cpu_time': 77,
885                     'cpu_time_other': 66.6}
886                ],
887                'time_unit': 'ns',
888                'utest': {
889                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
890                }
891            },
892            {
893                'name': u'medium',
894                'measurements': [
895                    {'real_time_other': 5,
896                     'cpu_time': 80,
897                     'time': -0.375,
898                     'real_time': 8,
899                     'cpu_time_other': 53,
900                     'cpu': -0.3375
901                    }
902                ],
903                'utest': {},
904                'time_unit': u'ns',
905                'aggregate_name': ''
906            }
907        ]
908        self.assertEqual(len(self.json_diff_report), len(expected_output))
909        for out, expected in zip(
910                self.json_diff_report, expected_output):
911            self.assertEqual(out['name'], expected['name'])
912            self.assertEqual(out['time_unit'], expected['time_unit'])
913            assert_utest(self, out, expected)
914            assert_measurements(self, out, expected)
915
916
917class TestReportSorting(unittest.TestCase):
918    @classmethod
919    def setUpClass(cls):
920        def load_result():
921            import json
922            testInputs = os.path.join(
923                os.path.dirname(
924                    os.path.realpath(__file__)),
925                'Inputs')
926            testOutput = os.path.join(testInputs, 'test4_run.json')
927            with open(testOutput, 'r') as f:
928                json = json.load(f)
929            return json
930
931        cls.json = load_result()
932
933    def test_json_diff_report_pretty_printing(self):
934        import util
935
936        expected_names = [
937            "99 family 0 instance 0 repetition 0",
938            "98 family 0 instance 0 repetition 1",
939            "97 family 0 instance 0 aggregate",
940            "96 family 0 instance 1 repetition 0",
941            "95 family 0 instance 1 repetition 1",
942            "94 family 0 instance 1 aggregate",
943            "93 family 1 instance 0 repetition 0",
944            "92 family 1 instance 0 repetition 1",
945            "91 family 1 instance 0 aggregate",
946            "90 family 1 instance 1 repetition 0",
947            "89 family 1 instance 1 repetition 1",
948            "88 family 1 instance 1 aggregate"
949        ]
950
951        for n in range(len(self.json['benchmarks']) ** 2):
952            random.shuffle(self.json['benchmarks'])
953            sorted_benchmarks = util.sort_benchmark_results(self.json)[
954                'benchmarks']
955            self.assertEqual(len(expected_names), len(sorted_benchmarks))
956            for out, expected in zip(sorted_benchmarks, expected_names):
957                self.assertEqual(out['name'], expected)
958
959
960def assert_utest(unittest_instance, lhs, rhs):
961    if lhs['utest']:
962        unittest_instance.assertAlmostEqual(
963            lhs['utest']['cpu_pvalue'],
964            rhs['utest']['cpu_pvalue'])
965        unittest_instance.assertAlmostEqual(
966            lhs['utest']['time_pvalue'],
967            rhs['utest']['time_pvalue'])
968        unittest_instance.assertEqual(
969            lhs['utest']['have_optimal_repetitions'],
970            rhs['utest']['have_optimal_repetitions'])
971    else:
972        # lhs is empty. assert if rhs is not.
973        unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
974
975
976def assert_measurements(unittest_instance, lhs, rhs):
977    for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
978        unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
979        unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
980        # m1['time'] and m1['cpu'] hold values which are being calculated,
981        # and therefore we must use almost-equal pattern.
982        unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
983        unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
984
985
986if __name__ == '__main__':
987    unittest.main()
988
989# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
990# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
991# kate: indent-mode python; remove-trailing-spaces modified;
992