1#!/usr/bin/env python3 2# 3# Parallel VM test case executor 4# Copyright (c) 2014-2019, Jouni Malinen <j@w1.fi> 5# 6# This software may be distributed under the terms of the BSD license. 7# See README for more details. 8 9from __future__ import print_function 10import curses 11import fcntl 12import logging 13import multiprocessing 14import os 15import selectors 16import subprocess 17import sys 18import time 19import errno 20 21logger = logging.getLogger() 22 23# Test cases that take significantly longer time to execute than average. 24long_tests = ["ap_roam_open", 25 "wpas_mesh_password_mismatch_retry", 26 "wpas_mesh_password_mismatch", 27 "hostapd_oom_wpa2_psk_connect", 28 "ap_hs20_fetch_osu_stop", 29 "ap_roam_wpa2_psk", 30 "ibss_wpa_none_ccmp", 31 "nfc_wps_er_handover_pk_hash_mismatch_sta", 32 "go_neg_peers_force_diff_freq", 33 "p2p_cli_invite", 34 "sta_ap_scan_2b", 35 "ap_pmf_sta_unprot_deauth_burst", 36 "ap_bss_add_remove_during_ht_scan", 37 "wext_scan_hidden", 38 "autoscan_exponential", 39 "nfc_p2p_client", 40 "wnm_bss_keep_alive", 41 "ap_inactivity_disconnect", 42 "scan_bss_expiration_age", 43 "autoscan_periodic", 44 "discovery_group_client", 45 "concurrent_p2pcli", 46 "ap_bss_add_remove", 47 "wpas_ap_wps", 48 "wext_pmksa_cache", 49 "ibss_wpa_none", 50 "ap_ht_40mhz_intolerant_ap", 51 "ibss_rsn", 52 "discovery_pd_retries", 53 "ap_wps_setup_locked_timeout", 54 "ap_vht160", 55 'he160', 56 'he160b', 57 "dfs_radar", 58 "dfs", 59 "dfs_ht40_minus", 60 "dfs_etsi", 61 "dfs_radar_vht80_downgrade", 62 "ap_acs_dfs", 63 "grpform_cred_ready_timeout", 64 "hostapd_oom_wpa2_eap_connect", 65 "wpas_ap_dfs", 66 "autogo_many", 67 "hostapd_oom_wpa2_eap", 68 "ibss_open", 69 "proxyarp_open_ebtables", 70 "proxyarp_open_ebtables_ipv6", 71 "radius_failover", 72 "obss_scan_40_intolerant", 73 "dbus_connect_oom", 74 "proxyarp_open", 75 "proxyarp_open_ipv6", 76 "ap_wps_iteration", 77 "ap_wps_iteration_error", 78 "ap_wps_pbc_timeout", 79 "ap_wps_pbc_ap_timeout", 80 "ap_wps_pin_ap_timeout", 81 "ap_wps_http_timeout", 82 "p2p_go_move_reg_change", 83 "p2p_go_move_active", 84 "p2p_go_move_scm", 85 "p2p_go_move_scm_peer_supports", 86 "p2p_go_move_scm_peer_does_not_support", 87 "p2p_go_move_scm_multi"] 88 89def get_failed(vm): 90 failed = [] 91 for i in range(num_servers): 92 failed += vm[i]['failed'] 93 return failed 94 95def vm_read_stdout(vm, test_queue): 96 global total_started, total_passed, total_failed, total_skipped 97 global rerun_failures 98 global first_run_failures 99 100 ready = False 101 try: 102 out = vm['proc'].stdout.read() 103 if out == None: 104 return False 105 out = out.decode() 106 except IOError as e: 107 if e.errno == errno.EAGAIN: 108 return False 109 raise 110 logger.debug("VM[%d] stdout.read[%s]" % (vm['idx'], out.rstrip())) 111 pending = vm['pending'] + out 112 lines = [] 113 while True: 114 pos = pending.find('\n') 115 if pos < 0: 116 break 117 line = pending[0:pos].rstrip() 118 pending = pending[(pos + 1):] 119 logger.debug("VM[%d] stdout full line[%s]" % (vm['idx'], line)) 120 if line.startswith("READY"): 121 vm['starting'] = False 122 vm['started'] = True 123 ready = True 124 elif line.startswith("PASS"): 125 ready = True 126 total_passed += 1 127 elif line.startswith("FAIL"): 128 ready = True 129 total_failed += 1 130 vals = line.split(' ') 131 if len(vals) < 2: 132 logger.info("VM[%d] incomplete FAIL line: %s" % (vm['idx'], 133 line)) 134 name = line 135 else: 136 name = vals[1] 137 logger.debug("VM[%d] test case failed: %s" % (vm['idx'], name)) 138 vm['failed'].append(name) 139 if name != vm['current_name']: 140 logger.info("VM[%d] test result mismatch: %s (expected %s)" % (vm['idx'], name, vm['current_name'])) 141 else: 142 count = vm['current_count'] 143 if count == 0: 144 first_run_failures.append(name) 145 if rerun_failures and count < 1: 146 logger.debug("Requeue test case %s" % name) 147 test_queue.append((name, vm['current_count'] + 1)) 148 elif line.startswith("NOT-FOUND"): 149 ready = True 150 total_failed += 1 151 logger.info("VM[%d] test case not found" % vm['idx']) 152 elif line.startswith("SKIP"): 153 ready = True 154 total_skipped += 1 155 elif line.startswith("REASON"): 156 vm['skip_reason'].append(line[7:]) 157 elif line.startswith("START"): 158 total_started += 1 159 if len(vm['failed']) == 0: 160 vals = line.split(' ') 161 if len(vals) >= 2: 162 vm['fail_seq'].append(vals[1]) 163 vm['out'] += line + '\n' 164 lines.append(line) 165 vm['pending'] = pending 166 return ready 167 168def start_vm(vm, sel): 169 logger.info("VM[%d] starting up" % (vm['idx'] + 1)) 170 vm['starting'] = True 171 vm['proc'] = subprocess.Popen(vm['cmd'], 172 stdin=subprocess.PIPE, 173 stdout=subprocess.PIPE, 174 stderr=subprocess.PIPE) 175 vm['cmd'] = None 176 for stream in [vm['proc'].stdout, vm['proc'].stderr]: 177 fd = stream.fileno() 178 fl = fcntl.fcntl(fd, fcntl.F_GETFL) 179 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) 180 sel.register(stream, selectors.EVENT_READ, vm) 181 182def num_vm_starting(): 183 count = 0 184 for i in range(num_servers): 185 if vm[i]['starting']: 186 count += 1 187 return count 188 189def vm_read_stderr(vm): 190 try: 191 err = vm['proc'].stderr.read() 192 if err != None: 193 err = err.decode() 194 if len(err) > 0: 195 vm['err'] += err 196 logger.info("VM[%d] stderr.read[%s]" % (vm['idx'], err)) 197 except IOError as e: 198 if e.errno != errno.EAGAIN: 199 raise 200 201def vm_next_step(_vm, scr, test_queue): 202 scr.move(_vm['idx'] + 1, 10) 203 scr.clrtoeol() 204 if not test_queue: 205 _vm['proc'].stdin.write(b'\n') 206 _vm['proc'].stdin.flush() 207 scr.addstr("shutting down") 208 logger.info("VM[%d] shutting down" % _vm['idx']) 209 return 210 (name, count) = test_queue.pop(0) 211 _vm['current_name'] = name 212 _vm['current_count'] = count 213 _vm['proc'].stdin.write(name.encode() + b'\n') 214 _vm['proc'].stdin.flush() 215 scr.addstr(name) 216 logger.debug("VM[%d] start test %s" % (_vm['idx'], name)) 217 218def check_vm_start(scr, sel, test_queue): 219 running = False 220 for i in range(num_servers): 221 if vm[i]['proc']: 222 running = True 223 continue 224 225 # Either not yet started or already stopped VM 226 max_start = multiprocessing.cpu_count() 227 if max_start > 4: 228 max_start /= 2 229 num_starting = num_vm_starting() 230 if vm[i]['cmd'] and len(test_queue) > num_starting and \ 231 num_starting < max_start: 232 scr.move(i + 1, 10) 233 scr.clrtoeol() 234 scr.addstr(i + 1, 10, "starting VM") 235 start_vm(vm[i], sel) 236 return True, True 237 238 return running, False 239 240def vm_terminated(_vm, scr, sel, test_queue): 241 updated = False 242 for stream in [_vm['proc'].stdout, _vm['proc'].stderr]: 243 sel.unregister(stream) 244 _vm['proc'] = None 245 scr.move(_vm['idx'] + 1, 10) 246 scr.clrtoeol() 247 log = '{}/{}.srv.{}/console'.format(dir, timestamp, _vm['idx'] + 1) 248 with open(log, 'r') as f: 249 if "Kernel panic" in f.read(): 250 scr.addstr("kernel panic") 251 logger.info("VM[%d] kernel panic" % _vm['idx']) 252 updated = True 253 if test_queue: 254 num_vm = 0 255 for i in range(num_servers): 256 if _vm['proc']: 257 num_vm += 1 258 if len(test_queue) > num_vm: 259 scr.addstr("unexpected exit") 260 logger.info("VM[%d] unexpected exit" % i) 261 updated = True 262 return updated 263 264def update_screen(scr, total_tests): 265 scr.move(num_servers + 1, 10) 266 scr.clrtoeol() 267 scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests))) 268 scr.addstr(num_servers + 1, 20, 269 "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped)) 270 failed = get_failed(vm) 271 if len(failed) > 0: 272 scr.move(num_servers + 2, 0) 273 scr.clrtoeol() 274 scr.addstr("Failed test cases: ") 275 count = 0 276 for f in failed: 277 count += 1 278 if count > 30: 279 scr.addstr('...') 280 scr.clrtoeol() 281 break 282 scr.addstr(f) 283 scr.addstr(' ') 284 scr.refresh() 285 286def show_progress(scr): 287 global num_servers 288 global vm 289 global dir 290 global timestamp 291 global tests 292 global first_run_failures 293 global total_started, total_passed, total_failed, total_skipped 294 global rerun_failures 295 296 sel = selectors.DefaultSelector() 297 total_tests = len(tests) 298 logger.info("Total tests: %d" % total_tests) 299 test_queue = [(t, 0) for t in tests] 300 start_vm(vm[0], sel) 301 302 scr.leaveok(1) 303 scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD) 304 for i in range(0, num_servers): 305 scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD) 306 status = "starting VM" if vm[i]['proc'] else "not yet started" 307 scr.addstr(i + 1, 10, status) 308 scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD) 309 scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests)) 310 scr.refresh() 311 312 while True: 313 updated = False 314 events = sel.select(timeout=1) 315 for key, mask in events: 316 _vm = key.data 317 if not _vm['proc']: 318 continue 319 vm_read_stderr(_vm) 320 if vm_read_stdout(_vm, test_queue): 321 vm_next_step(_vm, scr, test_queue) 322 updated = True 323 vm_read_stderr(_vm) 324 if _vm['proc'].poll() is not None: 325 if vm_terminated(_vm, scr, sel, test_queue): 326 updated = True 327 328 running, run_update = check_vm_start(scr, sel, test_queue) 329 if updated or run_update: 330 update_screen(scr, total_tests) 331 if not running: 332 break 333 sel.close() 334 335 for i in range(num_servers): 336 if not vm[i]['proc']: 337 continue 338 vm[i]['proc'] = None 339 scr.move(i + 1, 10) 340 scr.clrtoeol() 341 scr.addstr("still running") 342 logger.info("VM[%d] still running" % i) 343 344 scr.refresh() 345 time.sleep(0.3) 346 347def known_output(tests, line): 348 if not line: 349 return True 350 if line in tests: 351 return True 352 known = ["START ", "PASS ", "FAIL ", "SKIP ", "REASON ", "ALL-PASSED", 353 "READY", 354 " ", "Exception: ", "Traceback (most recent call last):", 355 "./run-all.sh: running", 356 "./run-all.sh: passing", 357 "Test run completed", "Logfiles are at", "Starting test run", 358 "passed all", "skipped ", "failed tests:"] 359 for k in known: 360 if line.startswith(k): 361 return True 362 return False 363 364def main(): 365 import argparse 366 import os 367 global num_servers 368 global vm 369 global dir 370 global timestamp 371 global tests 372 global first_run_failures 373 global total_started, total_passed, total_failed, total_skipped 374 global rerun_failures 375 376 total_started = 0 377 total_passed = 0 378 total_failed = 0 379 total_skipped = 0 380 381 debug_level = logging.INFO 382 rerun_failures = True 383 timestamp = int(time.time()) 384 385 scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0])) 386 387 p = argparse.ArgumentParser(description='run multiple testing VMs in parallel') 388 p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100), 389 help="number of VMs to start") 390 p.add_argument('-f', dest='testmodules', metavar='<test module>', 391 help='execute only tests from these test modules', 392 type=str, nargs='+') 393 p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False, 394 help="don't retry failed tests automatically") 395 p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False, 396 help="enable debug logging") 397 p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False, 398 help="enable code coverage collection") 399 p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False, 400 help="shuffle test cases to randomize order") 401 p.add_argument('--short', dest='short', action='store_const', const=True, 402 default=False, 403 help="only run short-duration test cases") 404 p.add_argument('--long', dest='long', action='store_const', const=True, 405 default=False, 406 help="include long-duration test cases") 407 p.add_argument('--valgrind', dest='valgrind', action='store_const', 408 const=True, default=False, 409 help="run tests under valgrind") 410 p.add_argument('--telnet', dest='telnet', metavar='<baseport>', type=int, 411 help="enable telnet server inside VMs, specify the base port here") 412 p.add_argument('--nocurses', dest='nocurses', action='store_const', 413 const=True, default=False, help="Don't use curses for output") 414 p.add_argument('params', nargs='*') 415 args = p.parse_args() 416 417 dir = os.environ.get('HWSIM_TEST_LOG_DIR', '/tmp/hwsim-test-logs') 418 try: 419 os.makedirs(dir) 420 except OSError as e: 421 if e.errno != errno.EEXIST: 422 raise 423 424 num_servers = args.num_servers 425 rerun_failures = not args.no_retry 426 if args.debug: 427 debug_level = logging.DEBUG 428 extra_args = [] 429 if args.valgrind: 430 extra_args += ['--valgrind'] 431 if args.long: 432 extra_args += ['--long'] 433 if args.codecov: 434 print("Code coverage - build separate binaries") 435 logdir = os.path.join(dir, str(timestamp)) 436 os.makedirs(logdir) 437 subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'), 438 logdir]) 439 codecov_args = ['--codecov_dir', logdir] 440 codecov = True 441 else: 442 codecov_args = [] 443 codecov = False 444 445 first_run_failures = [] 446 if args.params: 447 tests = args.params 448 else: 449 tests = [] 450 cmd = [os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'), '-L'] 451 if args.testmodules: 452 cmd += ["-f"] 453 cmd += args.testmodules 454 lst = subprocess.Popen(cmd, stdout=subprocess.PIPE) 455 for l in lst.stdout.readlines(): 456 name = l.decode().split(' ')[0] 457 tests.append(name) 458 if len(tests) == 0: 459 sys.exit("No test cases selected") 460 461 if args.shuffle: 462 from random import shuffle 463 shuffle(tests) 464 elif num_servers > 2 and len(tests) > 100: 465 # Move test cases with long duration to the beginning as an 466 # optimization to avoid last part of the test execution running a long 467 # duration test case on a single VM while all other VMs have already 468 # completed their work. 469 for l in long_tests: 470 if l in tests: 471 tests.remove(l) 472 tests.insert(0, l) 473 if args.short: 474 tests = [t for t in tests if t not in long_tests] 475 476 logger.setLevel(debug_level) 477 if not args.nocurses: 478 log_handler = logging.FileHandler('parallel-vm.log') 479 else: 480 log_handler = logging.StreamHandler(sys.stdout) 481 log_handler.setLevel(debug_level) 482 fmt = "%(asctime)s %(levelname)s %(message)s" 483 log_formatter = logging.Formatter(fmt) 484 log_handler.setFormatter(log_formatter) 485 logger.addHandler(log_handler) 486 487 vm = {} 488 for i in range(0, num_servers): 489 cmd = [os.path.join(scriptsdir, 'vm-run.sh'), 490 '--timestamp', str(timestamp), 491 '--ext', 'srv.%d' % (i + 1), 492 '-i'] + codecov_args + extra_args 493 if args.telnet: 494 cmd += ['--telnet', str(args.telnet + i)] 495 vm[i] = {} 496 vm[i]['idx'] = i 497 vm[i]['starting'] = False 498 vm[i]['started'] = False 499 vm[i]['cmd'] = cmd 500 vm[i]['proc'] = None 501 vm[i]['out'] = "" 502 vm[i]['pending'] = "" 503 vm[i]['err'] = "" 504 vm[i]['failed'] = [] 505 vm[i]['fail_seq'] = [] 506 vm[i]['skip_reason'] = [] 507 print('') 508 509 if not args.nocurses: 510 curses.wrapper(show_progress) 511 else: 512 class FakeScreen: 513 def leaveok(self, n): 514 pass 515 def refresh(self): 516 pass 517 def addstr(self, *args, **kw): 518 pass 519 def move(self, x, y): 520 pass 521 def clrtoeol(self): 522 pass 523 show_progress(FakeScreen()) 524 525 with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f: 526 for i in range(0, num_servers): 527 f.write('VM {}\n{}\n{}\n'.format(i + 1, vm[i]['out'], vm[i]['err'])) 528 first = True 529 for i in range(0, num_servers): 530 for line in vm[i]['out'].splitlines(): 531 if line.startswith("FAIL "): 532 if first: 533 first = False 534 print("Logs for failed test cases:") 535 f.write("Logs for failed test cases:\n") 536 fname = "%s/%d.srv.%d/%s.log" % (dir, timestamp, i + 1, 537 line.split(' ')[1]) 538 print(fname) 539 f.write("%s\n" % fname) 540 541 failed = get_failed(vm) 542 543 if first_run_failures: 544 print("To re-run same failure sequence(s):") 545 for i in range(0, num_servers): 546 if len(vm[i]['failed']) == 0: 547 continue 548 print("./vm-run.sh", end=' ') 549 if args.long: 550 print("--long", end=' ') 551 skip = len(vm[i]['fail_seq']) 552 skip -= min(skip, 30) 553 for t in vm[i]['fail_seq']: 554 if skip > 0: 555 skip -= 1 556 continue 557 print(t, end=' ') 558 print('') 559 print("Failed test cases:") 560 for f in first_run_failures: 561 print(f, end=' ') 562 logger.info("Failed: " + f) 563 print('') 564 double_failed = [] 565 for name in failed: 566 double_failed.append(name) 567 for test in first_run_failures: 568 double_failed.remove(test) 569 if not rerun_failures: 570 pass 571 elif failed and not double_failed: 572 print("All failed cases passed on retry") 573 logger.info("All failed cases passed on retry") 574 elif double_failed: 575 print("Failed even on retry:") 576 for f in double_failed: 577 print(f, end=' ') 578 logger.info("Failed on retry: " + f) 579 print('') 580 res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started, 581 total_passed, 582 total_failed, 583 total_skipped) 584 print(res) 585 logger.info(res) 586 print("Logs: " + dir + '/' + str(timestamp)) 587 logger.info("Logs: " + dir + '/' + str(timestamp)) 588 589 skip_reason = [] 590 for i in range(num_servers): 591 if not vm[i]['started']: 592 continue 593 skip_reason += vm[i]['skip_reason'] 594 if len(vm[i]['pending']) > 0: 595 logger.info("Unprocessed stdout from VM[%d]: '%s'" % 596 (i, vm[i]['pending'])) 597 log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1) 598 with open(log, 'r') as f: 599 if "Kernel panic" in f.read(): 600 print("Kernel panic in " + log) 601 logger.info("Kernel panic in " + log) 602 missing = {} 603 missing['OCV not supported'] = 'OCV' 604 missing['sigma_dut not available'] = 'sigma_dut' 605 missing['Skip test case with long duration due to --long not specified'] = 'long' 606 missing['TEST_ALLOC_FAIL not supported' ] = 'TEST_FAIL' 607 missing['TEST_ALLOC_FAIL not supported in the build'] = 'TEST_FAIL' 608 missing['TEST_FAIL not supported' ] = 'TEST_FAIL' 609 missing['veth not supported (kernel CONFIG_VETH)'] = 'KERNEL:CONFIG_VETH' 610 missing['WPA-EAP-SUITE-B-192 not supported'] = 'CONFIG_SUITEB192' 611 missing['WPA-EAP-SUITE-B not supported'] = 'CONFIG_SUITEB' 612 missing['wmediumd not available'] = 'wmediumd' 613 missing['DPP not supported'] = 'CONFIG_DPP' 614 missing['DPP version 2 not supported'] = 'CONFIG_DPP2' 615 missing['EAP method PWD not supported in the build'] = 'CONFIG_EAP_PWD' 616 missing['EAP method TEAP not supported in the build'] = 'CONFIG_EAP_TEAP' 617 missing['FILS not supported'] = 'CONFIG_FILS' 618 missing['FILS-SK-PFS not supported'] = 'CONFIG_FILS_SK_PFS' 619 missing['OWE not supported'] = 'CONFIG_OWE' 620 missing['SAE not supported'] = 'CONFIG_SAE' 621 missing['Not using OpenSSL'] = 'CONFIG_TLS=openssl' 622 missing['wpa_supplicant TLS library is not OpenSSL: internal'] = 'CONFIG_TLS=openssl' 623 missing_items = [] 624 other_reasons = [] 625 for reason in sorted(set(skip_reason)): 626 if reason in missing: 627 missing_items.append(missing[reason]) 628 elif reason.startswith('OCSP-multi not supported with this TLS library'): 629 missing_items.append('OCSP-MULTI') 630 else: 631 other_reasons.append(reason) 632 if missing_items: 633 print("Missing items (SKIP):", missing_items) 634 if other_reasons: 635 print("Other skip reasons:", other_reasons) 636 637 for i in range(num_servers): 638 unknown = "" 639 for line in vm[i]['out'].splitlines(): 640 if not known_output(tests, line): 641 unknown += line + "\n" 642 if unknown: 643 print("\nVM %d - unexpected stdout output:\n%s" % (i, unknown)) 644 if vm[i]['err']: 645 print("\nVM %d - unexpected stderr output:\n%s\n" % (i, vm[i]['err'])) 646 647 if codecov: 648 print("Code coverage - preparing report") 649 for i in range(num_servers): 650 subprocess.check_call([os.path.join(scriptsdir, 651 'process-codecov.sh'), 652 logdir + ".srv.%d" % (i + 1), 653 str(i)]) 654 subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'), 655 logdir]) 656 print("file://%s/index.html" % logdir) 657 logger.info("Code coverage report: file://%s/index.html" % logdir) 658 659 if double_failed or (failed and not rerun_failures): 660 logger.info("Test run complete - failures found") 661 sys.exit(2) 662 if failed: 663 logger.info("Test run complete - failures found on first run; passed on retry") 664 sys.exit(1) 665 logger.info("Test run complete - no failures") 666 sys.exit(0) 667 668if __name__ == "__main__": 669 main() 670