1"""report.py - Utilities for reporting statistics about benchmark results
4import unittest
5import os
6import re
7import copy
8import random
10from scipy.stats import mannwhitneyu
13class BenchmarkColor(object):
14    def __init__(self, name, code):
15        self.name = name
16        self.code = code
18    def __repr__(self):
19        return '%s%r' % (self.__class__.__name__,
20                         (self.name, self.code))
22    def __format__(self, format):
23        return self.code
26# Benchmark Colors Enumeration
27BC_NONE = BenchmarkColor('NONE', '')
28BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
29BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
30BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
31BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
32BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
33BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
34BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
35BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
36BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
37BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
38BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
41UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
42UTEST_COL_NAME = "_pvalue"
45def color_format(use_color, fmt_str, *args, **kwargs):
46    """
47    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
48    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
49    is False then all color codes in 'args' and 'kwargs' are replaced with
50    the empty string.
51    """
52    assert use_color is True or use_color is False
53    if not use_color:
54        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
55                for arg in args]
56        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
57                  for key, arg in kwargs.items()}
58    return fmt_str.format(*args, **kwargs)
61def find_longest_name(benchmark_list):
62    """
63    Return the length of the longest benchmark name in a given list of
64    benchmark JSON objects
65    """
66    longest_name = 1
67    for bc in benchmark_list:
68        if len(bc['name']) > longest_name:
69            longest_name = len(bc['name'])
70    return longest_name
73def calculate_change(old_val, new_val):
74    """
75    Return a float representing the decimal change between old_val and new_val.
76    """
77    if old_val == 0 and new_val == 0:
78        return 0.0
79    if old_val == 0:
80        return float(new_val - old_val) / (float(old_val + new_val) / 2)
81    return float(new_val - old_val) / abs(old_val)
84def filter_benchmark(json_orig, family, replacement=""):
85    """
86    Apply a filter to the json, and only leave the 'family' of benchmarks.
87    """
88    regex = re.compile(family)
89    filtered = {}
90    filtered['benchmarks'] = []
91    for be in json_orig['benchmarks']:
92        if not regex.search(be['name']):
93            continue
94        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
95        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
96        filtered['benchmarks'].append(filteredbench)
97    return filtered
100def get_unique_benchmark_names(json):
101    """
102    While *keeping* the order, give all the unique 'names' used for benchmarks.
103    """
104    seen = set()
105    uniqued = [x['name'] for x in json['benchmarks']
106               if x['name'] not in seen and
107               (seen.add(x['name']) or True)]
108    return uniqued
111def intersect(list1, list2):
112    """
113    Given two lists, get a new list consisting of the elements only contained
114    in *both of the input lists*, while preserving the ordering.
115    """
116    return [x for x in list1 if x in list2]
119def is_potentially_comparable_benchmark(x):
120    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
123def partition_benchmarks(json1, json2):
124    """
125    While preserving the ordering, find benchmarks with the same names in
126    both of the inputs, and group them.
127    (i.e. partition/filter into groups with common name)
128    """
129    json1_unique_names = get_unique_benchmark_names(json1)
130    json2_unique_names = get_unique_benchmark_names(json2)
131    names = intersect(json1_unique_names, json2_unique_names)
132    partitions = []
133    for name in names:
134        time_unit = None
135        # Pick the time unit from the first entry of the lhs benchmark.
136        # We should be careful not to crash with unexpected input.
137        for x in json1['benchmarks']:
138            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
139                time_unit = x['time_unit']
140                break
141        if time_unit is None:
142            continue
143        # Filter by name and time unit.
144        # All the repetitions are assumed to be comparable.
145        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
146               x['time_unit'] == time_unit]
147        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
148               x['time_unit'] == time_unit]
149        partitions.append([lhs, rhs])
150    return partitions
153def extract_field(partition, field_name):
154    # The count of elements may be different. We want *all* of them.
155    lhs = [x[field_name] for x in partition[0]]
156    rhs = [x[field_name] for x in partition[1]]
157    return [lhs, rhs]
160def calc_utest(timings_cpu, timings_time):
161    min_rep_cnt = min(len(timings_time[0]),
162                      len(timings_time[1]),
163                      len(timings_cpu[0]),
164                      len(timings_cpu[1]))
166    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
167    if min_rep_cnt < UTEST_MIN_REPETITIONS:
168        return False, None, None
170    time_pvalue = mannwhitneyu(
171        timings_time[0], timings_time[1], alternative='two-sided').pvalue
172    cpu_pvalue = mannwhitneyu(
173        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
175    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
177def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
178    def get_utest_color(pval):
179        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
181    # Check if we failed miserably with minimum required repetitions for utest
182    if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
183        return []
185    dsc = "U Test, Repetitions: {} vs {}".format(
186        utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
187    dsc_color = BC_OKGREEN
189    # We still got some results to show but issue a warning about it.
190    if not utest['have_optimal_repetitions']:
191        dsc_color = BC_WARNING
192        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
195    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
197    return [color_format(use_color,
198                         special_str,
199                         BC_HEADER,
200                         "{}{}".format(bc_name, UTEST_COL_NAME),
201                         first_col_width,
202                         get_utest_color(
203                             utest['time_pvalue']), utest['time_pvalue'],
204                         get_utest_color(
205                             utest['cpu_pvalue']), utest['cpu_pvalue'],
206                         dsc_color, dsc,
207                         endc=BC_ENDC)]
210def get_difference_report(
211        json1,
212        json2,
213        utest=False):
214    """
215    Calculate and report the difference between each test of two benchmarks
216    runs specified as 'json1' and 'json2'. Output is another json containing
217    relevant details for each test run.
218    """
219    assert utest is True or utest is False
221    diff_report = []
222    partitions = partition_benchmarks(json1, json2)
223    for partition in partitions:
224        benchmark_name = partition[0][0]['name']
225        time_unit = partition[0][0]['time_unit']
226        measurements = []
227        utest_results = {}
228        # Careful, we may have different repetition count.
229        for i in range(min(len(partition[0]), len(partition[1]))):
230            bn = partition[0][i]
231            other_bench = partition[1][i]
232            measurements.append({
233                'real_time': bn['real_time'],
234                'cpu_time': bn['cpu_time'],
235                'real_time_other': other_bench['real_time'],
236                'cpu_time_other': other_bench['cpu_time'],
237                'time': calculate_change(bn['real_time'], other_bench['real_time']),
238                'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
239            })
241        # After processing the whole partition, if requested, do the U test.
242        if utest:
243            timings_cpu = extract_field(partition, 'cpu_time')
244            timings_time = extract_field(partition, 'real_time')
245            have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
246            if cpu_pvalue and time_pvalue:
247                utest_results = {
248                    'have_optimal_repetitions': have_optimal_repetitions,
249                    'cpu_pvalue': cpu_pvalue,
250                    'time_pvalue': time_pvalue,
251                    'nr_of_repetitions': len(timings_cpu[0]),
252                    'nr_of_repetitions_other': len(timings_cpu[1])
253                }
255        # Store only if we had any measurements for given benchmark.
256        # E.g. partition_benchmarks will filter out the benchmarks having
257        # time units which are not compatible with other time units in the
258        # benchmark suite.
259        if measurements:
260            run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
261            aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
262            diff_report.append({
263                'name': benchmark_name,
264                'measurements': measurements,
265                'time_unit': time_unit,
266                'run_type': run_type,
267                'aggregate_name': aggregate_name,
268                'utest': utest_results
269            })
271    return diff_report
274def print_difference_report(
275        json_diff_report,
276        include_aggregates_only=False,
277        utest=False,
278        utest_alpha=0.05,
279        use_color=True):
280    """
281    Calculate and report the difference between each test of two benchmarks
282    runs specified as 'json1' and 'json2'.
283    """
284    assert utest is True or utest is False
286    def get_color(res):
287        if res > 0.05:
288            return BC_FAIL
289        elif res > -0.07:
290            return BC_WHITE
291        else:
292            return BC_CYAN
294    first_col_width = find_longest_name(json_diff_report)
295    first_col_width = max(
296        first_col_width,
297        len('Benchmark'))
298    first_col_width += len(UTEST_COL_NAME)
299    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
300        'Benchmark', 12 + first_col_width)
301    output_strs = [first_line, '-' * len(first_line)]
303    fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
304    for benchmark in json_diff_report:
305        # *If* we were asked to only include aggregates,
306        # and if it is non-aggregate, then don't print it.
307        if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
308            for measurement in benchmark['measurements']:
309                output_strs += [color_format(use_color,
310                                            fmt_str,
311                                            BC_HEADER,
312                                            benchmark['name'],
313                                            first_col_width,
314                                            get_color(measurement['time']),
315                                            measurement['time'],
316                                            get_color(measurement['cpu']),
317                                            measurement['cpu'],
318                                            measurement['real_time'],
319                                            measurement['real_time_other'],
320                                            measurement['cpu_time'],
321                                            measurement['cpu_time_other'],
322                                            endc=BC_ENDC)]
324        # After processing the measurements, if requested and
325        # if applicable (e.g. u-test exists for given benchmark),
326        # print the U test.
327        if utest and benchmark['utest']:
328            output_strs += print_utest(benchmark['name'],
329                                       benchmark['utest'],
330                                       utest_alpha=utest_alpha,
331                                       first_col_width=first_col_width,
332                                       use_color=use_color)
334    return output_strs
338# Unit tests
341class TestGetUniqueBenchmarkNames(unittest.TestCase):
342    def load_results(self):
343        import json
344        testInputs = os.path.join(
345            os.path.dirname(
346                os.path.realpath(__file__)),
347            'Inputs')
348        testOutput = os.path.join(testInputs, 'test3_run0.json')
349        with open(testOutput, 'r') as f:
350            json = json.load(f)
351        return json
353    def test_basic(self):
354        expect_lines = [
355            'BM_One',
356            'BM_Two',
357            'short',  # These two are not sorted
358            'medium',  # These two are not sorted
359        ]
360        json = self.load_results()
361        output_lines = get_unique_benchmark_names(json)
362        print("\n")
363        print("\n".join(output_lines))
364        self.assertEqual(len(output_lines), len(expect_lines))
365        for i in range(0, len(output_lines)):
366            self.assertEqual(expect_lines[i], output_lines[i])
369class TestReportDifference(unittest.TestCase):
370    @classmethod
371    def setUpClass(cls):
372        def load_results():
373            import json
374            testInputs = os.path.join(
375                os.path.dirname(
376                    os.path.realpath(__file__)),
377                'Inputs')
378            testOutput1 = os.path.join(testInputs, 'test1_run1.json')
379            testOutput2 = os.path.join(testInputs, 'test1_run2.json')
380            with open(testOutput1, 'r') as f:
381                json1 = json.load(f)
382            with open(testOutput2, 'r') as f:
383                json2 = json.load(f)
384            return json1, json2
386        json1, json2 = load_results()
387        cls.json_diff_report = get_difference_report(json1, json2)
389    def test_json_diff_report_pretty_printing(self):
390        expect_lines = [
391            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
392            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
393            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
394            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
395            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
396            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
397            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
398            ['BM_100xSlower', '+99.0000', '+99.0000',
399                '100', '10000', '100', '10000'],
400            ['BM_100xFaster', '-0.9900', '-0.9900',
401                '10000', '100', '10000', '100'],
402            ['BM_10PercentCPUToTime', '+0.1000',
403                '-0.1000', '100', '110', '100', '90'],
404            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
405            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
406        ]
407        output_lines_with_header = print_difference_report(
408            self.json_diff_report, use_color=False)
409        output_lines = output_lines_with_header[2:]
410        print("\n")
411        print("\n".join(output_lines_with_header))
412        self.assertEqual(len(output_lines), len(expect_lines))
413        for i in range(0, len(output_lines)):
414            parts = [x for x in output_lines[i].split(' ') if x]
415            self.assertEqual(len(parts), 7)
416            self.assertEqual(expect_lines[i], parts)
418    def test_json_diff_report_output(self):
419        expected_output = [
420            {
421                'name': 'BM_SameTimes',
422                'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}],
423                'time_unit': 'ns',
424                'utest': {}
425            },
426            {
427                'name': 'BM_2xFaster',
428                'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}],
429                'time_unit': 'ns',
430                'utest': {}
431            },
432            {
433                'name': 'BM_2xSlower',
434                'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}],
435                'time_unit': 'ns',
436                'utest': {}
437            },
438            {
439                'name': 'BM_1PercentFaster',
440                'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
441                'time_unit': 'ns',
442                'utest': {}
443            },
444            {
445                'name': 'BM_1PercentSlower',
446                'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}],
447                'time_unit': 'ns',
448                'utest': {}
449            },
450            {
451                'name': 'BM_10PercentFaster',
452                'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}],
453                'time_unit': 'ns',
454                'utest': {}
455            },
456            {
457                'name': 'BM_10PercentSlower',
458                'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}],
459                'time_unit': 'ns',
460                'utest': {}
461            },
462            {
463                'name': 'BM_100xSlower',
464                'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}],
465                'time_unit': 'ns',
466                'utest': {}
467            },
468            {
469                'name': 'BM_100xFaster',
470                'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}],
471                'time_unit': 'ns',
472                'utest': {}
473            },
474            {
475                'name': 'BM_10PercentCPUToTime',
476                'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}],
477                'time_unit': 'ns',
478                'utest': {}
479            },
480            {
481                'name': 'BM_ThirdFaster',
482                'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}],
483                'time_unit': 'ns',
484                'utest': {}
485            },
486            {
487                'name': 'BM_NotBadTimeUnit',
488                'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
489                'time_unit': 's',
490                'utest': {}
491            },
492        ]
493        self.assertEqual(len(self.json_diff_report), len(expected_output))
494        for out, expected in zip(
495                self.json_diff_report, expected_output):
496            self.assertEqual(out['name'], expected['name'])
497            self.assertEqual(out['time_unit'], expected['time_unit'])
498            assert_utest(self, out, expected)
499            assert_measurements(self, out, expected)
502class TestReportDifferenceBetweenFamilies(unittest.TestCase):
503    @classmethod
504    def setUpClass(cls):
505        def load_result():
506            import json
507            testInputs = os.path.join(
508                os.path.dirname(
509                    os.path.realpath(__file__)),
510                'Inputs')
511            testOutput = os.path.join(testInputs, 'test2_run.json')
512            with open(testOutput, 'r') as f:
513                json = json.load(f)
514            return json
516        json = load_result()
517        json1 = filter_benchmark(json, "BM_Z.ro", ".")
518        json2 = filter_benchmark(json, "BM_O.e", ".")
519        cls.json_diff_report = get_difference_report(json1, json2)
521    def test_json_diff_report_pretty_printing(self):
522        expect_lines = [
523            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
524            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
525            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
526            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
527        ]
528        output_lines_with_header = print_difference_report(
529            self.json_diff_report, use_color=False)
530        output_lines = output_lines_with_header[2:]
531        print("\n")
532        print("\n".join(output_lines_with_header))
533        self.assertEqual(len(output_lines), len(expect_lines))
534        for i in range(0, len(output_lines)):
535            parts = [x for x in output_lines[i].split(' ') if x]
536            self.assertEqual(len(parts), 7)
537            self.assertEqual(expect_lines[i], parts)
539    def test_json_diff_report(self):
540        expected_output = [
541            {
542                'name': u'.',
543                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
544                'time_unit': 'ns',
545                'utest': {}
546            },
547            {
548                'name': u'./4',
549                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
550                'time_unit': 'ns',
551                'utest': {},
552            },
553            {
554                'name': u'Prefix/.',
555                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
556                'time_unit': 'ns',
557                'utest': {}
558            },
559            {
560                'name': u'Prefix/./3',
561                'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
562                'time_unit': 'ns',
563                'utest': {}
564            }
565        ]
566        self.assertEqual(len(self.json_diff_report), len(expected_output))
567        for out, expected in zip(
568                self.json_diff_report, expected_output):
569            self.assertEqual(out['name'], expected['name'])
570            self.assertEqual(out['time_unit'], expected['time_unit'])
571            assert_utest(self, out, expected)
572            assert_measurements(self, out, expected)
575class TestReportDifferenceWithUTest(unittest.TestCase):
576    @classmethod
577    def setUpClass(cls):
578        def load_results():
579            import json
580            testInputs = os.path.join(
581                os.path.dirname(
582                    os.path.realpath(__file__)),
583                'Inputs')
584            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
585            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
586            with open(testOutput1, 'r') as f:
587                json1 = json.load(f)
588            with open(testOutput2, 'r') as f:
589                json2 = json.load(f)
590            return json1, json2
592        json1, json2 = load_results()
593        cls.json_diff_report = get_difference_report(
594            json1, json2, utest=True)
596    def test_json_diff_report_pretty_printing(self):
597        expect_lines = [
598            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
599            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
600            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
601            ['BM_Two_pvalue',
602             '0.6985',
603             '0.6985',
604             'U',
605             'Test,',
606             'Repetitions:',
607             '2',
608             'vs',
609             '2.',
610             'WARNING:',
611             'Results',
612             'unreliable!',
613             '9+',
614             'repetitions',
615             'recommended.'],
616            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
617            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
618            ['short_pvalue',
619             '0.7671',
620             '0.1489',
621             'U',
622             'Test,',
623             'Repetitions:',
624             '2',
625             'vs',
626             '3.',
627             'WARNING:',
628             'Results',
629             'unreliable!',
630             '9+',
631             'repetitions',
632             'recommended.'],
633            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
634        ]
635        output_lines_with_header = print_difference_report(
636            self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
637        output_lines = output_lines_with_header[2:]
638        print("\n")
639        print("\n".join(output_lines_with_header))
640        self.assertEqual(len(output_lines), len(expect_lines))
641        for i in range(0, len(output_lines)):
642            parts = [x for x in output_lines[i].split(' ') if x]
643            self.assertEqual(expect_lines[i], parts)
645    def test_json_diff_report_pretty_printing_aggregates_only(self):
646        expect_lines = [
647            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
648            ['BM_Two_pvalue',
649             '0.6985',
650             '0.6985',
651             'U',
652             'Test,',
653             'Repetitions:',
654             '2',
655             'vs',
656             '2.',
657             'WARNING:',
658             'Results',
659             'unreliable!',
660             '9+',
661             'repetitions',
662             'recommended.'],
663            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
664            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
665            ['short_pvalue',
666             '0.7671',
667             '0.1489',
668             'U',
669             'Test,',
670             'Repetitions:',
671             '2',
672             'vs',
673             '3.',
674             'WARNING:',
675             'Results',
676             'unreliable!',
677             '9+',
678             'repetitions',
679             'recommended.'],
680        ]
681        output_lines_with_header = print_difference_report(
682            self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
683        output_lines = output_lines_with_header[2:]
684        print("\n")
685        print("\n".join(output_lines_with_header))
686        self.assertEqual(len(output_lines), len(expect_lines))
687        for i in range(0, len(output_lines)):
688            parts = [x for x in output_lines[i].split(' ') if x]
689            self.assertEqual(expect_lines[i], parts)
691    def test_json_diff_report(self):
692        expected_output = [
693            {
694                'name': u'BM_One',
695                'measurements': [
696                    {'time': -0.1,
697                     'cpu': 0.1,
698                     'real_time': 10,
699                     'real_time_other': 9,
700                     'cpu_time': 100,
701                     'cpu_time_other': 110}
702                ],
703                'time_unit': 'ns',
704                'utest': {}
705            },
706            {
707                'name': u'BM_Two',
708                'measurements': [
709                    {'time': 0.1111111111111111,
710                     'cpu': -0.011111111111111112,
711                     'real_time': 9,
712                     'real_time_other': 10,
713                     'cpu_time': 90,
714                     'cpu_time_other': 89},
715                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
716                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
717                ],
718                'time_unit': 'ns',
719                'utest': {
720                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
721                }
722            },
723            {
724                'name': u'short',
725                'measurements': [
726                    {'time': -0.125,
727                     'cpu': -0.0625,
728                     'real_time': 8,
729                     'real_time_other': 7,
730                     'cpu_time': 80,
731                     'cpu_time_other': 75},
732                    {'time': -0.4325,
733                     'cpu': -0.13506493506493514,
734                     'real_time': 8,
735                     'real_time_other': 4.54,
736                     'cpu_time': 77,
737                     'cpu_time_other': 66.6}
738                ],
739                'time_unit': 'ns',
740                'utest': {
741                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
742                }
743            },
744            {
745                'name': u'medium',
746                'measurements': [
747                    {'time': -0.375,
748                     'cpu': -0.3375,
749                     'real_time': 8,
750                     'real_time_other': 5,
751                     'cpu_time': 80,
752                     'cpu_time_other': 53}
753                ],
754                'time_unit': 'ns',
755                'utest': {}
756            }
757        ]
758        self.assertEqual(len(self.json_diff_report), len(expected_output))
759        for out, expected in zip(
760                self.json_diff_report, expected_output):
761            self.assertEqual(out['name'], expected['name'])
762            self.assertEqual(out['time_unit'], expected['time_unit'])
763            assert_utest(self, out, expected)
764            assert_measurements(self, out, expected)
767class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
768        unittest.TestCase):
769    @classmethod
770    def setUpClass(cls):
771        def load_results():
772            import json
773            testInputs = os.path.join(
774                os.path.dirname(
775                    os.path.realpath(__file__)),
776                'Inputs')
777            testOutput1 = os.path.join(testInputs, 'test3_run0.json')
778            testOutput2 = os.path.join(testInputs, 'test3_run1.json')
779            with open(testOutput1, 'r') as f:
780                json1 = json.load(f)
781            with open(testOutput2, 'r') as f:
782                json2 = json.load(f)
783            return json1, json2
785        json1, json2 = load_results()
786        cls.json_diff_report = get_difference_report(
787            json1, json2, utest=True)
789    def test_json_diff_report_pretty_printing(self):
790        expect_lines = [
791            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
792            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
793            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
794            ['BM_Two_pvalue',
795             '0.6985',
796             '0.6985',
797             'U',
798             'Test,',
799             'Repetitions:',
800             '2',
801             'vs',
802             '2.',
803             'WARNING:',
804             'Results',
805             'unreliable!',
806             '9+',
807             'repetitions',
808             'recommended.'],
809            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
810            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
811            ['short_pvalue',
812             '0.7671',
813             '0.1489',
814             'U',
815             'Test,',
816             'Repetitions:',
817             '2',
818             'vs',
819             '3.',
820             'WARNING:',
821             'Results',
822             'unreliable!',
823             '9+',
824             'repetitions',
825             'recommended.'],
826             ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53']
827        ]
828        output_lines_with_header = print_difference_report(
829            self.json_diff_report,
830            utest=True, utest_alpha=0.05, use_color=False)
831        output_lines = output_lines_with_header[2:]
832        print("\n")
833        print("\n".join(output_lines_with_header))
834        self.assertEqual(len(output_lines), len(expect_lines))
835        for i in range(0, len(output_lines)):
836            parts = [x for x in output_lines[i].split(' ') if x]
837            self.assertEqual(expect_lines[i], parts)
839    def test_json_diff_report(self):
840        expected_output = [
841            {
842                'name': u'BM_One',
843                'measurements': [
844                    {'time': -0.1,
845                     'cpu': 0.1,
846                     'real_time': 10,
847                     'real_time_other': 9,
848                     'cpu_time': 100,
849                     'cpu_time_other': 110}
850                ],
851                'time_unit': 'ns',
852                'utest': {}
853            },
854            {
855                'name': u'BM_Two',
856                'measurements': [
857                    {'time': 0.1111111111111111,
858                     'cpu': -0.011111111111111112,
859                     'real_time': 9,
860                     'real_time_other': 10,
861                     'cpu_time': 90,
862                     'cpu_time_other': 89},
863                    {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
864                        'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
865                ],
866                'time_unit': 'ns',
867                'utest': {
868                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387
869                }
870            },
871            {
872                'name': u'short',
873                'measurements': [
874                    {'time': -0.125,
875                     'cpu': -0.0625,
876                     'real_time': 8,
877                     'real_time_other': 7,
878                     'cpu_time': 80,
879                     'cpu_time_other': 75},
880                    {'time': -0.4325,
881                     'cpu': -0.13506493506493514,
882                     'real_time': 8,
883                     'real_time_other': 4.54,
884                     'cpu_time': 77,
885                     'cpu_time_other': 66.6}
886                ],
887                'time_unit': 'ns',
888                'utest': {
889                    'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772
890                }
891            },
892            {
893                'name': u'medium',
894                'measurements': [
895                    {'real_time_other': 5,
896                     'cpu_time': 80,
897                     'time': -0.375,
898                     'real_time': 8,
899                     'cpu_time_other': 53,
900                     'cpu': -0.3375
901                    }
902                ],
903                'utest': {},
904                'time_unit': u'ns',
905                'aggregate_name': ''
906            }
907        ]
908        self.assertEqual(len(self.json_diff_report), len(expected_output))
909        for out, expected in zip(
910                self.json_diff_report, expected_output):
911            self.assertEqual(out['name'], expected['name'])
912            self.assertEqual(out['time_unit'], expected['time_unit'])
913            assert_utest(self, out, expected)
914            assert_measurements(self, out, expected)
917class TestReportSorting(unittest.TestCase):
918    @classmethod
919    def setUpClass(cls):
920        def load_result():
921            import json
922            testInputs = os.path.join(
923                os.path.dirname(
924                    os.path.realpath(__file__)),
925                'Inputs')
926            testOutput = os.path.join(testInputs, 'test4_run.json')
927            with open(testOutput, 'r') as f:
928                json = json.load(f)
929            return json
931        cls.json = load_result()
933    def test_json_diff_report_pretty_printing(self):
934        import util
936        expected_names = [
937            "99 family 0 instance 0 repetition 0",
938            "98 family 0 instance 0 repetition 1",
939            "97 family 0 instance 0 aggregate",
940            "96 family 0 instance 1 repetition 0",
941            "95 family 0 instance 1 repetition 1",
942            "94 family 0 instance 1 aggregate",
943            "93 family 1 instance 0 repetition 0",
944            "92 family 1 instance 0 repetition 1",
945            "91 family 1 instance 0 aggregate",
946            "90 family 1 instance 1 repetition 0",
947            "89 family 1 instance 1 repetition 1",
948            "88 family 1 instance 1 aggregate"
949        ]
951        for n in range(len(self.json['benchmarks']) ** 2):
952            random.shuffle(self.json['benchmarks'])
953            sorted_benchmarks = util.sort_benchmark_results(self.json)[
954                'benchmarks']
955            self.assertEqual(len(expected_names), len(sorted_benchmarks))
956            for out, expected in zip(sorted_benchmarks, expected_names):
957                self.assertEqual(out['name'], expected)
960def assert_utest(unittest_instance, lhs, rhs):
961    if lhs['utest']:
962        unittest_instance.assertAlmostEqual(
963            lhs['utest']['cpu_pvalue'],
964            rhs['utest']['cpu_pvalue'])
965        unittest_instance.assertAlmostEqual(
966            lhs['utest']['time_pvalue'],
967            rhs['utest']['time_pvalue'])
968        unittest_instance.assertEqual(
969            lhs['utest']['have_optimal_repetitions'],
970            rhs['utest']['have_optimal_repetitions'])
971    else:
972        # lhs is empty. assert if rhs is not.
973        unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
976def assert_measurements(unittest_instance, lhs, rhs):
977    for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
978        unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
979        unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
980        # m1['time'] and m1['cpu'] hold values which are being calculated,
981        # and therefore we must use almost-equal pattern.
982        unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
983        unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
986if __name__ == '__main__':
987    unittest.main()
989# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
990# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
991# kate: indent-mode python; remove-trailing-spaces modified;