1"""report.py - Utilities for reporting statistics about benchmark results 2""" 3 4import unittest 5import os 6import re 7import copy 8import random 9 10from scipy.stats import mannwhitneyu 11 12 13class BenchmarkColor(object): 14 def __init__(self, name, code): 15 self.name = name 16 self.code = code 17 18 def __repr__(self): 19 return '%s%r' % (self.__class__.__name__, 20 (self.name, self.code)) 21 22 def __format__(self, format): 23 return self.code 24 25 26# Benchmark Colors Enumeration 27BC_NONE = BenchmarkColor('NONE', '') 28BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m') 29BC_CYAN = BenchmarkColor('CYAN', '\033[96m') 30BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m') 31BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m') 32BC_HEADER = BenchmarkColor('HEADER', '\033[92m') 33BC_WARNING = BenchmarkColor('WARNING', '\033[93m') 34BC_WHITE = BenchmarkColor('WHITE', '\033[97m') 35BC_FAIL = BenchmarkColor('FAIL', '\033[91m') 36BC_ENDC = BenchmarkColor('ENDC', '\033[0m') 37BC_BOLD = BenchmarkColor('BOLD', '\033[1m') 38BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m') 39 40UTEST_MIN_REPETITIONS = 2 41UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better. 42UTEST_COL_NAME = "_pvalue" 43 44 45def color_format(use_color, fmt_str, *args, **kwargs): 46 """ 47 Return the result of 'fmt_str.format(*args, **kwargs)' after transforming 48 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color' 49 is False then all color codes in 'args' and 'kwargs' are replaced with 50 the empty string. 51 """ 52 assert use_color is True or use_color is False 53 if not use_color: 54 args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE 55 for arg in args] 56 kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE 57 for key, arg in kwargs.items()} 58 return fmt_str.format(*args, **kwargs) 59 60 61def find_longest_name(benchmark_list): 62 """ 63 Return the length of the longest benchmark name in a given list of 64 benchmark JSON objects 65 """ 66 longest_name = 1 67 for bc in benchmark_list: 68 if len(bc['name']) > longest_name: 69 longest_name = len(bc['name']) 70 return longest_name 71 72 73def calculate_change(old_val, new_val): 74 """ 75 Return a float representing the decimal change between old_val and new_val. 76 """ 77 if old_val == 0 and new_val == 0: 78 return 0.0 79 if old_val == 0: 80 return float(new_val - old_val) / (float(old_val + new_val) / 2) 81 return float(new_val - old_val) / abs(old_val) 82 83 84def filter_benchmark(json_orig, family, replacement=""): 85 """ 86 Apply a filter to the json, and only leave the 'family' of benchmarks. 87 """ 88 regex = re.compile(family) 89 filtered = {} 90 filtered['benchmarks'] = [] 91 for be in json_orig['benchmarks']: 92 if not regex.search(be['name']): 93 continue 94 filteredbench = copy.deepcopy(be) # Do NOT modify the old name! 95 filteredbench['name'] = regex.sub(replacement, filteredbench['name']) 96 filtered['benchmarks'].append(filteredbench) 97 return filtered 98 99 100def get_unique_benchmark_names(json): 101 """ 102 While *keeping* the order, give all the unique 'names' used for benchmarks. 103 """ 104 seen = set() 105 uniqued = [x['name'] for x in json['benchmarks'] 106 if x['name'] not in seen and 107 (seen.add(x['name']) or True)] 108 return uniqued 109 110 111def intersect(list1, list2): 112 """ 113 Given two lists, get a new list consisting of the elements only contained 114 in *both of the input lists*, while preserving the ordering. 115 """ 116 return [x for x in list1 if x in list2] 117 118 119def is_potentially_comparable_benchmark(x): 120 return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x) 121 122 123def partition_benchmarks(json1, json2): 124 """ 125 While preserving the ordering, find benchmarks with the same names in 126 both of the inputs, and group them. 127 (i.e. partition/filter into groups with common name) 128 """ 129 json1_unique_names = get_unique_benchmark_names(json1) 130 json2_unique_names = get_unique_benchmark_names(json2) 131 names = intersect(json1_unique_names, json2_unique_names) 132 partitions = [] 133 for name in names: 134 time_unit = None 135 # Pick the time unit from the first entry of the lhs benchmark. 136 # We should be careful not to crash with unexpected input. 137 for x in json1['benchmarks']: 138 if (x['name'] == name and is_potentially_comparable_benchmark(x)): 139 time_unit = x['time_unit'] 140 break 141 if time_unit is None: 142 continue 143 # Filter by name and time unit. 144 # All the repetitions are assumed to be comparable. 145 lhs = [x for x in json1['benchmarks'] if x['name'] == name and 146 x['time_unit'] == time_unit] 147 rhs = [x for x in json2['benchmarks'] if x['name'] == name and 148 x['time_unit'] == time_unit] 149 partitions.append([lhs, rhs]) 150 return partitions 151 152 153def extract_field(partition, field_name): 154 # The count of elements may be different. We want *all* of them. 155 lhs = [x[field_name] for x in partition[0]] 156 rhs = [x[field_name] for x in partition[1]] 157 return [lhs, rhs] 158 159 160def calc_utest(timings_cpu, timings_time): 161 min_rep_cnt = min(len(timings_time[0]), 162 len(timings_time[1]), 163 len(timings_cpu[0]), 164 len(timings_cpu[1])) 165 166 # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions? 167 if min_rep_cnt < UTEST_MIN_REPETITIONS: 168 return False, None, None 169 170 time_pvalue = mannwhitneyu( 171 timings_time[0], timings_time[1], alternative='two-sided').pvalue 172 cpu_pvalue = mannwhitneyu( 173 timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue 174 175 return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue 176 177def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True): 178 def get_utest_color(pval): 179 return BC_FAIL if pval >= utest_alpha else BC_OKGREEN 180 181 # Check if we failed miserably with minimum required repetitions for utest 182 if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None: 183 return [] 184 185 dsc = "U Test, Repetitions: {} vs {}".format( 186 utest['nr_of_repetitions'], utest['nr_of_repetitions_other']) 187 dsc_color = BC_OKGREEN 188 189 # We still got some results to show but issue a warning about it. 190 if not utest['have_optimal_repetitions']: 191 dsc_color = BC_WARNING 192 dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format( 193 UTEST_OPTIMAL_REPETITIONS) 194 195 special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}" 196 197 return [color_format(use_color, 198 special_str, 199 BC_HEADER, 200 "{}{}".format(bc_name, UTEST_COL_NAME), 201 first_col_width, 202 get_utest_color( 203 utest['time_pvalue']), utest['time_pvalue'], 204 get_utest_color( 205 utest['cpu_pvalue']), utest['cpu_pvalue'], 206 dsc_color, dsc, 207 endc=BC_ENDC)] 208 209 210def get_difference_report( 211 json1, 212 json2, 213 utest=False): 214 """ 215 Calculate and report the difference between each test of two benchmarks 216 runs specified as 'json1' and 'json2'. Output is another json containing 217 relevant details for each test run. 218 """ 219 assert utest is True or utest is False 220 221 diff_report = [] 222 partitions = partition_benchmarks(json1, json2) 223 for partition in partitions: 224 benchmark_name = partition[0][0]['name'] 225 time_unit = partition[0][0]['time_unit'] 226 measurements = [] 227 utest_results = {} 228 # Careful, we may have different repetition count. 229 for i in range(min(len(partition[0]), len(partition[1]))): 230 bn = partition[0][i] 231 other_bench = partition[1][i] 232 measurements.append({ 233 'real_time': bn['real_time'], 234 'cpu_time': bn['cpu_time'], 235 'real_time_other': other_bench['real_time'], 236 'cpu_time_other': other_bench['cpu_time'], 237 'time': calculate_change(bn['real_time'], other_bench['real_time']), 238 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time']) 239 }) 240 241 # After processing the whole partition, if requested, do the U test. 242 if utest: 243 timings_cpu = extract_field(partition, 'cpu_time') 244 timings_time = extract_field(partition, 'real_time') 245 have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time) 246 if cpu_pvalue and time_pvalue: 247 utest_results = { 248 'have_optimal_repetitions': have_optimal_repetitions, 249 'cpu_pvalue': cpu_pvalue, 250 'time_pvalue': time_pvalue, 251 'nr_of_repetitions': len(timings_cpu[0]), 252 'nr_of_repetitions_other': len(timings_cpu[1]) 253 } 254 255 # Store only if we had any measurements for given benchmark. 256 # E.g. partition_benchmarks will filter out the benchmarks having 257 # time units which are not compatible with other time units in the 258 # benchmark suite. 259 if measurements: 260 run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else '' 261 aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else '' 262 diff_report.append({ 263 'name': benchmark_name, 264 'measurements': measurements, 265 'time_unit': time_unit, 266 'run_type': run_type, 267 'aggregate_name': aggregate_name, 268 'utest': utest_results 269 }) 270 271 return diff_report 272 273 274def print_difference_report( 275 json_diff_report, 276 include_aggregates_only=False, 277 utest=False, 278 utest_alpha=0.05, 279 use_color=True): 280 """ 281 Calculate and report the difference between each test of two benchmarks 282 runs specified as 'json1' and 'json2'. 283 """ 284 assert utest is True or utest is False 285 286 def get_color(res): 287 if res > 0.05: 288 return BC_FAIL 289 elif res > -0.07: 290 return BC_WHITE 291 else: 292 return BC_CYAN 293 294 first_col_width = find_longest_name(json_diff_report) 295 first_col_width = max( 296 first_col_width, 297 len('Benchmark')) 298 first_col_width += len(UTEST_COL_NAME) 299 first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format( 300 'Benchmark', 12 + first_col_width) 301 output_strs = [first_line, '-' * len(first_line)] 302 303 fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}" 304 for benchmark in json_diff_report: 305 # *If* we were asked to only include aggregates, 306 # and if it is non-aggregate, then don't print it. 307 if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate': 308 for measurement in benchmark['measurements']: 309 output_strs += [color_format(use_color, 310 fmt_str, 311 BC_HEADER, 312 benchmark['name'], 313 first_col_width, 314 get_color(measurement['time']), 315 measurement['time'], 316 get_color(measurement['cpu']), 317 measurement['cpu'], 318 measurement['real_time'], 319 measurement['real_time_other'], 320 measurement['cpu_time'], 321 measurement['cpu_time_other'], 322 endc=BC_ENDC)] 323 324 # After processing the measurements, if requested and 325 # if applicable (e.g. u-test exists for given benchmark), 326 # print the U test. 327 if utest and benchmark['utest']: 328 output_strs += print_utest(benchmark['name'], 329 benchmark['utest'], 330 utest_alpha=utest_alpha, 331 first_col_width=first_col_width, 332 use_color=use_color) 333 334 return output_strs 335 336 337############################################################################### 338# Unit tests 339 340 341class TestGetUniqueBenchmarkNames(unittest.TestCase): 342 def load_results(self): 343 import json 344 testInputs = os.path.join( 345 os.path.dirname( 346 os.path.realpath(__file__)), 347 'Inputs') 348 testOutput = os.path.join(testInputs, 'test3_run0.json') 349 with open(testOutput, 'r') as f: 350 json = json.load(f) 351 return json 352 353 def test_basic(self): 354 expect_lines = [ 355 'BM_One', 356 'BM_Two', 357 'short', # These two are not sorted 358 'medium', # These two are not sorted 359 ] 360 json = self.load_results() 361 output_lines = get_unique_benchmark_names(json) 362 print("\n") 363 print("\n".join(output_lines)) 364 self.assertEqual(len(output_lines), len(expect_lines)) 365 for i in range(0, len(output_lines)): 366 self.assertEqual(expect_lines[i], output_lines[i]) 367 368 369class TestReportDifference(unittest.TestCase): 370 @classmethod 371 def setUpClass(cls): 372 def load_results(): 373 import json 374 testInputs = os.path.join( 375 os.path.dirname( 376 os.path.realpath(__file__)), 377 'Inputs') 378 testOutput1 = os.path.join(testInputs, 'test1_run1.json') 379 testOutput2 = os.path.join(testInputs, 'test1_run2.json') 380 with open(testOutput1, 'r') as f: 381 json1 = json.load(f) 382 with open(testOutput2, 'r') as f: 383 json2 = json.load(f) 384 return json1, json2 385 386 json1, json2 = load_results() 387 cls.json_diff_report = get_difference_report(json1, json2) 388 389 def test_json_diff_report_pretty_printing(self): 390 expect_lines = [ 391 ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'], 392 ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'], 393 ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'], 394 ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'], 395 ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'], 396 ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'], 397 ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'], 398 ['BM_100xSlower', '+99.0000', '+99.0000', 399 '100', '10000', '100', '10000'], 400 ['BM_100xFaster', '-0.9900', '-0.9900', 401 '10000', '100', '10000', '100'], 402 ['BM_10PercentCPUToTime', '+0.1000', 403 '-0.1000', '100', '110', '100', '90'], 404 ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'], 405 ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'], 406 ] 407 output_lines_with_header = print_difference_report( 408 self.json_diff_report, use_color=False) 409 output_lines = output_lines_with_header[2:] 410 print("\n") 411 print("\n".join(output_lines_with_header)) 412 self.assertEqual(len(output_lines), len(expect_lines)) 413 for i in range(0, len(output_lines)): 414 parts = [x for x in output_lines[i].split(' ') if x] 415 self.assertEqual(len(parts), 7) 416 self.assertEqual(expect_lines[i], parts) 417 418 def test_json_diff_report_output(self): 419 expected_output = [ 420 { 421 'name': 'BM_SameTimes', 422 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}], 423 'time_unit': 'ns', 424 'utest': {} 425 }, 426 { 427 'name': 'BM_2xFaster', 428 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}], 429 'time_unit': 'ns', 430 'utest': {} 431 }, 432 { 433 'name': 'BM_2xSlower', 434 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}], 435 'time_unit': 'ns', 436 'utest': {} 437 }, 438 { 439 'name': 'BM_1PercentFaster', 440 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}], 441 'time_unit': 'ns', 442 'utest': {} 443 }, 444 { 445 'name': 'BM_1PercentSlower', 446 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}], 447 'time_unit': 'ns', 448 'utest': {} 449 }, 450 { 451 'name': 'BM_10PercentFaster', 452 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}], 453 'time_unit': 'ns', 454 'utest': {} 455 }, 456 { 457 'name': 'BM_10PercentSlower', 458 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}], 459 'time_unit': 'ns', 460 'utest': {} 461 }, 462 { 463 'name': 'BM_100xSlower', 464 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}], 465 'time_unit': 'ns', 466 'utest': {} 467 }, 468 { 469 'name': 'BM_100xFaster', 470 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}], 471 'time_unit': 'ns', 472 'utest': {} 473 }, 474 { 475 'name': 'BM_10PercentCPUToTime', 476 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}], 477 'time_unit': 'ns', 478 'utest': {} 479 }, 480 { 481 'name': 'BM_ThirdFaster', 482 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}], 483 'time_unit': 'ns', 484 'utest': {} 485 }, 486 { 487 'name': 'BM_NotBadTimeUnit', 488 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}], 489 'time_unit': 's', 490 'utest': {} 491 }, 492 ] 493 self.assertEqual(len(self.json_diff_report), len(expected_output)) 494 for out, expected in zip( 495 self.json_diff_report, expected_output): 496 self.assertEqual(out['name'], expected['name']) 497 self.assertEqual(out['time_unit'], expected['time_unit']) 498 assert_utest(self, out, expected) 499 assert_measurements(self, out, expected) 500 501 502class TestReportDifferenceBetweenFamilies(unittest.TestCase): 503 @classmethod 504 def setUpClass(cls): 505 def load_result(): 506 import json 507 testInputs = os.path.join( 508 os.path.dirname( 509 os.path.realpath(__file__)), 510 'Inputs') 511 testOutput = os.path.join(testInputs, 'test2_run.json') 512 with open(testOutput, 'r') as f: 513 json = json.load(f) 514 return json 515 516 json = load_result() 517 json1 = filter_benchmark(json, "BM_Z.ro", ".") 518 json2 = filter_benchmark(json, "BM_O.e", ".") 519 cls.json_diff_report = get_difference_report(json1, json2) 520 521 def test_json_diff_report_pretty_printing(self): 522 expect_lines = [ 523 ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'], 524 ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'], 525 ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'], 526 ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'], 527 ] 528 output_lines_with_header = print_difference_report( 529 self.json_diff_report, use_color=False) 530 output_lines = output_lines_with_header[2:] 531 print("\n") 532 print("\n".join(output_lines_with_header)) 533 self.assertEqual(len(output_lines), len(expect_lines)) 534 for i in range(0, len(output_lines)): 535 parts = [x for x in output_lines[i].split(' ') if x] 536 self.assertEqual(len(parts), 7) 537 self.assertEqual(expect_lines[i], parts) 538 539 def test_json_diff_report(self): 540 expected_output = [ 541 { 542 'name': u'.', 543 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}], 544 'time_unit': 'ns', 545 'utest': {} 546 }, 547 { 548 'name': u'./4', 549 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}], 550 'time_unit': 'ns', 551 'utest': {}, 552 }, 553 { 554 'name': u'Prefix/.', 555 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}], 556 'time_unit': 'ns', 557 'utest': {} 558 }, 559 { 560 'name': u'Prefix/./3', 561 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}], 562 'time_unit': 'ns', 563 'utest': {} 564 } 565 ] 566 self.assertEqual(len(self.json_diff_report), len(expected_output)) 567 for out, expected in zip( 568 self.json_diff_report, expected_output): 569 self.assertEqual(out['name'], expected['name']) 570 self.assertEqual(out['time_unit'], expected['time_unit']) 571 assert_utest(self, out, expected) 572 assert_measurements(self, out, expected) 573 574 575class TestReportDifferenceWithUTest(unittest.TestCase): 576 @classmethod 577 def setUpClass(cls): 578 def load_results(): 579 import json 580 testInputs = os.path.join( 581 os.path.dirname( 582 os.path.realpath(__file__)), 583 'Inputs') 584 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 585 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 586 with open(testOutput1, 'r') as f: 587 json1 = json.load(f) 588 with open(testOutput2, 'r') as f: 589 json2 = json.load(f) 590 return json1, json2 591 592 json1, json2 = load_results() 593 cls.json_diff_report = get_difference_report( 594 json1, json2, utest=True) 595 596 def test_json_diff_report_pretty_printing(self): 597 expect_lines = [ 598 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 599 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 600 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 601 ['BM_Two_pvalue', 602 '0.6985', 603 '0.6985', 604 'U', 605 'Test,', 606 'Repetitions:', 607 '2', 608 'vs', 609 '2.', 610 'WARNING:', 611 'Results', 612 'unreliable!', 613 '9+', 614 'repetitions', 615 'recommended.'], 616 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 617 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 618 ['short_pvalue', 619 '0.7671', 620 '0.1489', 621 'U', 622 'Test,', 623 'Repetitions:', 624 '2', 625 'vs', 626 '3.', 627 'WARNING:', 628 'Results', 629 'unreliable!', 630 '9+', 631 'repetitions', 632 'recommended.'], 633 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'], 634 ] 635 output_lines_with_header = print_difference_report( 636 self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False) 637 output_lines = output_lines_with_header[2:] 638 print("\n") 639 print("\n".join(output_lines_with_header)) 640 self.assertEqual(len(output_lines), len(expect_lines)) 641 for i in range(0, len(output_lines)): 642 parts = [x for x in output_lines[i].split(' ') if x] 643 self.assertEqual(expect_lines[i], parts) 644 645 def test_json_diff_report_pretty_printing_aggregates_only(self): 646 expect_lines = [ 647 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 648 ['BM_Two_pvalue', 649 '0.6985', 650 '0.6985', 651 'U', 652 'Test,', 653 'Repetitions:', 654 '2', 655 'vs', 656 '2.', 657 'WARNING:', 658 'Results', 659 'unreliable!', 660 '9+', 661 'repetitions', 662 'recommended.'], 663 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 664 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 665 ['short_pvalue', 666 '0.7671', 667 '0.1489', 668 'U', 669 'Test,', 670 'Repetitions:', 671 '2', 672 'vs', 673 '3.', 674 'WARNING:', 675 'Results', 676 'unreliable!', 677 '9+', 678 'repetitions', 679 'recommended.'], 680 ] 681 output_lines_with_header = print_difference_report( 682 self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False) 683 output_lines = output_lines_with_header[2:] 684 print("\n") 685 print("\n".join(output_lines_with_header)) 686 self.assertEqual(len(output_lines), len(expect_lines)) 687 for i in range(0, len(output_lines)): 688 parts = [x for x in output_lines[i].split(' ') if x] 689 self.assertEqual(expect_lines[i], parts) 690 691 def test_json_diff_report(self): 692 expected_output = [ 693 { 694 'name': u'BM_One', 695 'measurements': [ 696 {'time': -0.1, 697 'cpu': 0.1, 698 'real_time': 10, 699 'real_time_other': 9, 700 'cpu_time': 100, 701 'cpu_time_other': 110} 702 ], 703 'time_unit': 'ns', 704 'utest': {} 705 }, 706 { 707 'name': u'BM_Two', 708 'measurements': [ 709 {'time': 0.1111111111111111, 710 'cpu': -0.011111111111111112, 711 'real_time': 9, 712 'real_time_other': 10, 713 'cpu_time': 90, 714 'cpu_time_other': 89}, 715 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 716 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 717 ], 718 'time_unit': 'ns', 719 'utest': { 720 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 721 } 722 }, 723 { 724 'name': u'short', 725 'measurements': [ 726 {'time': -0.125, 727 'cpu': -0.0625, 728 'real_time': 8, 729 'real_time_other': 7, 730 'cpu_time': 80, 731 'cpu_time_other': 75}, 732 {'time': -0.4325, 733 'cpu': -0.13506493506493514, 734 'real_time': 8, 735 'real_time_other': 4.54, 736 'cpu_time': 77, 737 'cpu_time_other': 66.6} 738 ], 739 'time_unit': 'ns', 740 'utest': { 741 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 742 } 743 }, 744 { 745 'name': u'medium', 746 'measurements': [ 747 {'time': -0.375, 748 'cpu': -0.3375, 749 'real_time': 8, 750 'real_time_other': 5, 751 'cpu_time': 80, 752 'cpu_time_other': 53} 753 ], 754 'time_unit': 'ns', 755 'utest': {} 756 } 757 ] 758 self.assertEqual(len(self.json_diff_report), len(expected_output)) 759 for out, expected in zip( 760 self.json_diff_report, expected_output): 761 self.assertEqual(out['name'], expected['name']) 762 self.assertEqual(out['time_unit'], expected['time_unit']) 763 assert_utest(self, out, expected) 764 assert_measurements(self, out, expected) 765 766 767class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly( 768 unittest.TestCase): 769 @classmethod 770 def setUpClass(cls): 771 def load_results(): 772 import json 773 testInputs = os.path.join( 774 os.path.dirname( 775 os.path.realpath(__file__)), 776 'Inputs') 777 testOutput1 = os.path.join(testInputs, 'test3_run0.json') 778 testOutput2 = os.path.join(testInputs, 'test3_run1.json') 779 with open(testOutput1, 'r') as f: 780 json1 = json.load(f) 781 with open(testOutput2, 'r') as f: 782 json2 = json.load(f) 783 return json1, json2 784 785 json1, json2 = load_results() 786 cls.json_diff_report = get_difference_report( 787 json1, json2, utest=True) 788 789 def test_json_diff_report_pretty_printing(self): 790 expect_lines = [ 791 ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'], 792 ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'], 793 ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'], 794 ['BM_Two_pvalue', 795 '0.6985', 796 '0.6985', 797 'U', 798 'Test,', 799 'Repetitions:', 800 '2', 801 'vs', 802 '2.', 803 'WARNING:', 804 'Results', 805 'unreliable!', 806 '9+', 807 'repetitions', 808 'recommended.'], 809 ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'], 810 ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'], 811 ['short_pvalue', 812 '0.7671', 813 '0.1489', 814 'U', 815 'Test,', 816 'Repetitions:', 817 '2', 818 'vs', 819 '3.', 820 'WARNING:', 821 'Results', 822 'unreliable!', 823 '9+', 824 'repetitions', 825 'recommended.'], 826 ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'] 827 ] 828 output_lines_with_header = print_difference_report( 829 self.json_diff_report, 830 utest=True, utest_alpha=0.05, use_color=False) 831 output_lines = output_lines_with_header[2:] 832 print("\n") 833 print("\n".join(output_lines_with_header)) 834 self.assertEqual(len(output_lines), len(expect_lines)) 835 for i in range(0, len(output_lines)): 836 parts = [x for x in output_lines[i].split(' ') if x] 837 self.assertEqual(expect_lines[i], parts) 838 839 def test_json_diff_report(self): 840 expected_output = [ 841 { 842 'name': u'BM_One', 843 'measurements': [ 844 {'time': -0.1, 845 'cpu': 0.1, 846 'real_time': 10, 847 'real_time_other': 9, 848 'cpu_time': 100, 849 'cpu_time_other': 110} 850 ], 851 'time_unit': 'ns', 852 'utest': {} 853 }, 854 { 855 'name': u'BM_Two', 856 'measurements': [ 857 {'time': 0.1111111111111111, 858 'cpu': -0.011111111111111112, 859 'real_time': 9, 860 'real_time_other': 10, 861 'cpu_time': 90, 862 'cpu_time_other': 89}, 863 {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8, 864 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72} 865 ], 866 'time_unit': 'ns', 867 'utest': { 868 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6985353583033387, 'time_pvalue': 0.6985353583033387 869 } 870 }, 871 { 872 'name': u'short', 873 'measurements': [ 874 {'time': -0.125, 875 'cpu': -0.0625, 876 'real_time': 8, 877 'real_time_other': 7, 878 'cpu_time': 80, 879 'cpu_time_other': 75}, 880 {'time': -0.4325, 881 'cpu': -0.13506493506493514, 882 'real_time': 8, 883 'real_time_other': 4.54, 884 'cpu_time': 77, 885 'cpu_time_other': 66.6} 886 ], 887 'time_unit': 'ns', 888 'utest': { 889 'have_optimal_repetitions': False, 'cpu_pvalue': 0.14891467317876572, 'time_pvalue': 0.7670968684102772 890 } 891 }, 892 { 893 'name': u'medium', 894 'measurements': [ 895 {'real_time_other': 5, 896 'cpu_time': 80, 897 'time': -0.375, 898 'real_time': 8, 899 'cpu_time_other': 53, 900 'cpu': -0.3375 901 } 902 ], 903 'utest': {}, 904 'time_unit': u'ns', 905 'aggregate_name': '' 906 } 907 ] 908 self.assertEqual(len(self.json_diff_report), len(expected_output)) 909 for out, expected in zip( 910 self.json_diff_report, expected_output): 911 self.assertEqual(out['name'], expected['name']) 912 self.assertEqual(out['time_unit'], expected['time_unit']) 913 assert_utest(self, out, expected) 914 assert_measurements(self, out, expected) 915 916 917class TestReportSorting(unittest.TestCase): 918 @classmethod 919 def setUpClass(cls): 920 def load_result(): 921 import json 922 testInputs = os.path.join( 923 os.path.dirname( 924 os.path.realpath(__file__)), 925 'Inputs') 926 testOutput = os.path.join(testInputs, 'test4_run.json') 927 with open(testOutput, 'r') as f: 928 json = json.load(f) 929 return json 930 931 cls.json = load_result() 932 933 def test_json_diff_report_pretty_printing(self): 934 import util 935 936 expected_names = [ 937 "99 family 0 instance 0 repetition 0", 938 "98 family 0 instance 0 repetition 1", 939 "97 family 0 instance 0 aggregate", 940 "96 family 0 instance 1 repetition 0", 941 "95 family 0 instance 1 repetition 1", 942 "94 family 0 instance 1 aggregate", 943 "93 family 1 instance 0 repetition 0", 944 "92 family 1 instance 0 repetition 1", 945 "91 family 1 instance 0 aggregate", 946 "90 family 1 instance 1 repetition 0", 947 "89 family 1 instance 1 repetition 1", 948 "88 family 1 instance 1 aggregate" 949 ] 950 951 for n in range(len(self.json['benchmarks']) ** 2): 952 random.shuffle(self.json['benchmarks']) 953 sorted_benchmarks = util.sort_benchmark_results(self.json)[ 954 'benchmarks'] 955 self.assertEqual(len(expected_names), len(sorted_benchmarks)) 956 for out, expected in zip(sorted_benchmarks, expected_names): 957 self.assertEqual(out['name'], expected) 958 959 960def assert_utest(unittest_instance, lhs, rhs): 961 if lhs['utest']: 962 unittest_instance.assertAlmostEqual( 963 lhs['utest']['cpu_pvalue'], 964 rhs['utest']['cpu_pvalue']) 965 unittest_instance.assertAlmostEqual( 966 lhs['utest']['time_pvalue'], 967 rhs['utest']['time_pvalue']) 968 unittest_instance.assertEqual( 969 lhs['utest']['have_optimal_repetitions'], 970 rhs['utest']['have_optimal_repetitions']) 971 else: 972 # lhs is empty. assert if rhs is not. 973 unittest_instance.assertEqual(lhs['utest'], rhs['utest']) 974 975 976def assert_measurements(unittest_instance, lhs, rhs): 977 for m1, m2 in zip(lhs['measurements'], rhs['measurements']): 978 unittest_instance.assertEqual(m1['real_time'], m2['real_time']) 979 unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time']) 980 # m1['time'] and m1['cpu'] hold values which are being calculated, 981 # and therefore we must use almost-equal pattern. 982 unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4) 983 unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4) 984 985 986if __name__ == '__main__': 987 unittest.main() 988 989# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 990# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 991# kate: indent-mode python; remove-trailing-spaces modified; 992