1#!/usr/bin/env python3
2#
3# Parallel VM test case executor
4# Copyright (c) 2014-2019, Jouni Malinen <j@w1.fi>
5#
6# This software may be distributed under the terms of the BSD license.
7# See README for more details.
8
9from __future__ import print_function
10import curses
11import fcntl
12import logging
13import multiprocessing
14import os
15import selectors
16import subprocess
17import sys
18import time
19import errno
20
21logger = logging.getLogger()
22
23# Test cases that take significantly longer time to execute than average.
24long_tests = ["ap_roam_open",
25              "wpas_mesh_password_mismatch_retry",
26              "wpas_mesh_password_mismatch",
27              "hostapd_oom_wpa2_psk_connect",
28              "ap_hs20_fetch_osu_stop",
29              "ap_roam_wpa2_psk",
30              "ibss_wpa_none_ccmp",
31              "nfc_wps_er_handover_pk_hash_mismatch_sta",
32              "go_neg_peers_force_diff_freq",
33              "p2p_cli_invite",
34              "sta_ap_scan_2b",
35              "ap_pmf_sta_unprot_deauth_burst",
36              "ap_bss_add_remove_during_ht_scan",
37              "wext_scan_hidden",
38              "autoscan_exponential",
39              "nfc_p2p_client",
40              "wnm_bss_keep_alive",
41              "ap_inactivity_disconnect",
42              "scan_bss_expiration_age",
43              "autoscan_periodic",
44              "discovery_group_client",
45              "concurrent_p2pcli",
46              "ap_bss_add_remove",
47              "wpas_ap_wps",
48              "wext_pmksa_cache",
49              "ibss_wpa_none",
50              "ap_ht_40mhz_intolerant_ap",
51              "ibss_rsn",
52              "discovery_pd_retries",
53              "ap_wps_setup_locked_timeout",
54              "ap_vht160",
55              'he160',
56              'he160b',
57              "dfs_radar",
58              "dfs",
59              "dfs_ht40_minus",
60              "dfs_etsi",
61              "dfs_radar_vht80_downgrade",
62              "ap_acs_dfs",
63              "grpform_cred_ready_timeout",
64              "hostapd_oom_wpa2_eap_connect",
65              "wpas_ap_dfs",
66              "autogo_many",
67              "hostapd_oom_wpa2_eap",
68              "ibss_open",
69              "proxyarp_open_ebtables",
70              "proxyarp_open_ebtables_ipv6",
71              "radius_failover",
72              "obss_scan_40_intolerant",
73              "dbus_connect_oom",
74              "proxyarp_open",
75              "proxyarp_open_ipv6",
76              "ap_wps_iteration",
77              "ap_wps_iteration_error",
78              "ap_wps_pbc_timeout",
79              "ap_wps_pbc_ap_timeout",
80              "ap_wps_pin_ap_timeout",
81              "ap_wps_http_timeout",
82              "p2p_go_move_reg_change",
83              "p2p_go_move_active",
84              "p2p_go_move_scm",
85              "p2p_go_move_scm_peer_supports",
86              "p2p_go_move_scm_peer_does_not_support",
87              "p2p_go_move_scm_multi"]
88
89def get_failed(vm):
90    failed = []
91    for i in range(num_servers):
92        failed += vm[i]['failed']
93    return failed
94
95def vm_read_stdout(vm, test_queue):
96    global total_started, total_passed, total_failed, total_skipped
97    global rerun_failures
98    global first_run_failures
99
100    ready = False
101    try:
102        out = vm['proc'].stdout.read()
103        if out == None:
104            return False
105        out = out.decode()
106    except IOError as e:
107        if e.errno == errno.EAGAIN:
108            return False
109        raise
110    logger.debug("VM[%d] stdout.read[%s]" % (vm['idx'], out.rstrip()))
111    pending = vm['pending'] + out
112    lines = []
113    while True:
114        pos = pending.find('\n')
115        if pos < 0:
116            break
117        line = pending[0:pos].rstrip()
118        pending = pending[(pos + 1):]
119        logger.debug("VM[%d] stdout full line[%s]" % (vm['idx'], line))
120        if line.startswith("READY"):
121            vm['starting'] = False
122            vm['started'] = True
123            ready = True
124        elif line.startswith("PASS"):
125            ready = True
126            total_passed += 1
127        elif line.startswith("FAIL"):
128            ready = True
129            total_failed += 1
130            vals = line.split(' ')
131            if len(vals) < 2:
132                logger.info("VM[%d] incomplete FAIL line: %s" % (vm['idx'],
133                                                                 line))
134                name = line
135            else:
136                name = vals[1]
137            logger.debug("VM[%d] test case failed: %s" % (vm['idx'], name))
138            vm['failed'].append(name)
139            if name != vm['current_name']:
140                logger.info("VM[%d] test result mismatch: %s (expected %s)" % (vm['idx'], name, vm['current_name']))
141            else:
142                count = vm['current_count']
143                if count == 0:
144                    first_run_failures.append(name)
145                if rerun_failures and count < 1:
146                    logger.debug("Requeue test case %s" % name)
147                    test_queue.append((name, vm['current_count'] + 1))
148        elif line.startswith("NOT-FOUND"):
149            ready = True
150            total_failed += 1
151            logger.info("VM[%d] test case not found" % vm['idx'])
152        elif line.startswith("SKIP"):
153            ready = True
154            total_skipped += 1
155        elif line.startswith("REASON"):
156            vm['skip_reason'].append(line[7:])
157        elif line.startswith("START"):
158            total_started += 1
159            if len(vm['failed']) == 0:
160                vals = line.split(' ')
161                if len(vals) >= 2:
162                    vm['fail_seq'].append(vals[1])
163        vm['out'] += line + '\n'
164        lines.append(line)
165    vm['pending'] = pending
166    return ready
167
168def start_vm(vm, sel):
169    logger.info("VM[%d] starting up" % (vm['idx'] + 1))
170    vm['starting'] = True
171    vm['proc'] = subprocess.Popen(vm['cmd'],
172                                  stdin=subprocess.PIPE,
173                                  stdout=subprocess.PIPE,
174                                  stderr=subprocess.PIPE)
175    vm['cmd'] = None
176    for stream in [vm['proc'].stdout, vm['proc'].stderr]:
177        fd = stream.fileno()
178        fl = fcntl.fcntl(fd, fcntl.F_GETFL)
179        fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
180        sel.register(stream, selectors.EVENT_READ, vm)
181
182def num_vm_starting():
183    count = 0
184    for i in range(num_servers):
185        if vm[i]['starting']:
186            count += 1
187    return count
188
189def vm_read_stderr(vm):
190    try:
191        err = vm['proc'].stderr.read()
192        if err != None:
193            err = err.decode()
194            if len(err) > 0:
195                vm['err'] += err
196                logger.info("VM[%d] stderr.read[%s]" % (vm['idx'], err))
197    except IOError as e:
198        if e.errno != errno.EAGAIN:
199            raise
200
201def vm_next_step(_vm, scr, test_queue):
202    scr.move(_vm['idx'] + 1, 10)
203    scr.clrtoeol()
204    if not test_queue:
205        _vm['proc'].stdin.write(b'\n')
206        _vm['proc'].stdin.flush()
207        scr.addstr("shutting down")
208        logger.info("VM[%d] shutting down" % _vm['idx'])
209        return
210    (name, count) = test_queue.pop(0)
211    _vm['current_name'] = name
212    _vm['current_count'] = count
213    _vm['proc'].stdin.write(name.encode() + b'\n')
214    _vm['proc'].stdin.flush()
215    scr.addstr(name)
216    logger.debug("VM[%d] start test %s" % (_vm['idx'], name))
217
218def check_vm_start(scr, sel, test_queue):
219    running = False
220    for i in range(num_servers):
221        if vm[i]['proc']:
222            running = True
223            continue
224
225        # Either not yet started or already stopped VM
226        max_start = multiprocessing.cpu_count()
227        if max_start > 4:
228            max_start /= 2
229        num_starting = num_vm_starting()
230        if vm[i]['cmd'] and len(test_queue) > num_starting and \
231           num_starting < max_start:
232            scr.move(i + 1, 10)
233            scr.clrtoeol()
234            scr.addstr(i + 1, 10, "starting VM")
235            start_vm(vm[i], sel)
236            return True, True
237
238    return running, False
239
240def vm_terminated(_vm, scr, sel, test_queue):
241    updated = False
242    for stream in [_vm['proc'].stdout, _vm['proc'].stderr]:
243        sel.unregister(stream)
244    _vm['proc'] = None
245    scr.move(_vm['idx'] + 1, 10)
246    scr.clrtoeol()
247    log = '{}/{}.srv.{}/console'.format(dir, timestamp, _vm['idx'] + 1)
248    with open(log, 'r') as f:
249        if "Kernel panic" in f.read():
250            scr.addstr("kernel panic")
251            logger.info("VM[%d] kernel panic" % _vm['idx'])
252            updated = True
253    if test_queue:
254        num_vm = 0
255        for i in range(num_servers):
256            if _vm['proc']:
257                num_vm += 1
258        if len(test_queue) > num_vm:
259            scr.addstr("unexpected exit")
260            logger.info("VM[%d] unexpected exit" % i)
261            updated = True
262    return updated
263
264def update_screen(scr, total_tests):
265    scr.move(num_servers + 1, 10)
266    scr.clrtoeol()
267    scr.addstr("{} %".format(int(100.0 * (total_passed + total_failed + total_skipped) / total_tests)))
268    scr.addstr(num_servers + 1, 20,
269               "TOTAL={} STARTED={} PASS={} FAIL={} SKIP={}".format(total_tests, total_started, total_passed, total_failed, total_skipped))
270    failed = get_failed(vm)
271    if len(failed) > 0:
272        scr.move(num_servers + 2, 0)
273        scr.clrtoeol()
274        scr.addstr("Failed test cases: ")
275        count = 0
276        for f in failed:
277            count += 1
278            if count > 30:
279                scr.addstr('...')
280                scr.clrtoeol()
281                break
282            scr.addstr(f)
283            scr.addstr(' ')
284    scr.refresh()
285
286def show_progress(scr):
287    global num_servers
288    global vm
289    global dir
290    global timestamp
291    global tests
292    global first_run_failures
293    global total_started, total_passed, total_failed, total_skipped
294    global rerun_failures
295
296    sel = selectors.DefaultSelector()
297    total_tests = len(tests)
298    logger.info("Total tests: %d" % total_tests)
299    test_queue = [(t, 0) for t in tests]
300    start_vm(vm[0], sel)
301
302    scr.leaveok(1)
303    scr.addstr(0, 0, "Parallel test execution status", curses.A_BOLD)
304    for i in range(0, num_servers):
305        scr.addstr(i + 1, 0, "VM %d:" % (i + 1), curses.A_BOLD)
306        status = "starting VM" if vm[i]['proc'] else "not yet started"
307        scr.addstr(i + 1, 10, status)
308    scr.addstr(num_servers + 1, 0, "Total:", curses.A_BOLD)
309    scr.addstr(num_servers + 1, 20, "TOTAL={} STARTED=0 PASS=0 FAIL=0 SKIP=0".format(total_tests))
310    scr.refresh()
311
312    while True:
313        updated = False
314        events = sel.select(timeout=1)
315        for key, mask in events:
316            _vm = key.data
317            if not _vm['proc']:
318                continue
319            vm_read_stderr(_vm)
320            if vm_read_stdout(_vm, test_queue):
321                vm_next_step(_vm, scr, test_queue)
322                updated = True
323            vm_read_stderr(_vm)
324            if _vm['proc'].poll() is not None:
325                if vm_terminated(_vm, scr, sel, test_queue):
326                    updated = True
327
328        running, run_update = check_vm_start(scr, sel, test_queue)
329        if updated or run_update:
330            update_screen(scr, total_tests)
331        if not running:
332            break
333    sel.close()
334
335    for i in range(num_servers):
336        if not vm[i]['proc']:
337            continue
338        vm[i]['proc'] = None
339        scr.move(i + 1, 10)
340        scr.clrtoeol()
341        scr.addstr("still running")
342        logger.info("VM[%d] still running" % i)
343
344    scr.refresh()
345    time.sleep(0.3)
346
347def known_output(tests, line):
348    if not line:
349        return True
350    if line in tests:
351        return True
352    known = ["START ", "PASS ", "FAIL ", "SKIP ", "REASON ", "ALL-PASSED",
353             "READY",
354             "  ", "Exception: ", "Traceback (most recent call last):",
355             "./run-all.sh: running",
356             "./run-all.sh: passing",
357             "Test run completed", "Logfiles are at", "Starting test run",
358             "passed all", "skipped ", "failed tests:"]
359    for k in known:
360        if line.startswith(k):
361            return True
362    return False
363
364def main():
365    import argparse
366    import os
367    global num_servers
368    global vm
369    global dir
370    global timestamp
371    global tests
372    global first_run_failures
373    global total_started, total_passed, total_failed, total_skipped
374    global rerun_failures
375
376    total_started = 0
377    total_passed = 0
378    total_failed = 0
379    total_skipped = 0
380
381    debug_level = logging.INFO
382    rerun_failures = True
383    timestamp = int(time.time())
384
385    scriptsdir = os.path.dirname(os.path.realpath(sys.argv[0]))
386
387    p = argparse.ArgumentParser(description='run multiple testing VMs in parallel')
388    p.add_argument('num_servers', metavar='number of VMs', type=int, choices=range(1, 100),
389                   help="number of VMs to start")
390    p.add_argument('-f', dest='testmodules', metavar='<test module>',
391                   help='execute only tests from these test modules',
392                   type=str, nargs='+')
393    p.add_argument('-1', dest='no_retry', action='store_const', const=True, default=False,
394                   help="don't retry failed tests automatically")
395    p.add_argument('--debug', dest='debug', action='store_const', const=True, default=False,
396                   help="enable debug logging")
397    p.add_argument('--codecov', dest='codecov', action='store_const', const=True, default=False,
398                   help="enable code coverage collection")
399    p.add_argument('--shuffle-tests', dest='shuffle', action='store_const', const=True, default=False,
400                   help="shuffle test cases to randomize order")
401    p.add_argument('--short', dest='short', action='store_const', const=True,
402                   default=False,
403                   help="only run short-duration test cases")
404    p.add_argument('--long', dest='long', action='store_const', const=True,
405                   default=False,
406                   help="include long-duration test cases")
407    p.add_argument('--valgrind', dest='valgrind', action='store_const',
408                   const=True, default=False,
409                   help="run tests under valgrind")
410    p.add_argument('--telnet', dest='telnet', metavar='<baseport>', type=int,
411                   help="enable telnet server inside VMs, specify the base port here")
412    p.add_argument('--nocurses', dest='nocurses', action='store_const',
413                   const=True, default=False, help="Don't use curses for output")
414    p.add_argument('params', nargs='*')
415    args = p.parse_args()
416
417    dir = os.environ.get('HWSIM_TEST_LOG_DIR', '/tmp/hwsim-test-logs')
418    try:
419        os.makedirs(dir)
420    except OSError as e:
421        if e.errno != errno.EEXIST:
422            raise
423
424    num_servers = args.num_servers
425    rerun_failures = not args.no_retry
426    if args.debug:
427        debug_level = logging.DEBUG
428    extra_args = []
429    if args.valgrind:
430        extra_args += ['--valgrind']
431    if args.long:
432        extra_args += ['--long']
433    if args.codecov:
434        print("Code coverage - build separate binaries")
435        logdir = os.path.join(dir, str(timestamp))
436        os.makedirs(logdir)
437        subprocess.check_call([os.path.join(scriptsdir, 'build-codecov.sh'),
438                               logdir])
439        codecov_args = ['--codecov_dir', logdir]
440        codecov = True
441    else:
442        codecov_args = []
443        codecov = False
444
445    first_run_failures = []
446    if args.params:
447        tests = args.params
448    else:
449        tests = []
450        cmd = [os.path.join(os.path.dirname(scriptsdir), 'run-tests.py'), '-L']
451        if args.testmodules:
452            cmd += ["-f"]
453            cmd += args.testmodules
454        lst = subprocess.Popen(cmd, stdout=subprocess.PIPE)
455        for l in lst.stdout.readlines():
456            name = l.decode().split(' ')[0]
457            tests.append(name)
458    if len(tests) == 0:
459        sys.exit("No test cases selected")
460
461    if args.shuffle:
462        from random import shuffle
463        shuffle(tests)
464    elif num_servers > 2 and len(tests) > 100:
465        # Move test cases with long duration to the beginning as an
466        # optimization to avoid last part of the test execution running a long
467        # duration test case on a single VM while all other VMs have already
468        # completed their work.
469        for l in long_tests:
470            if l in tests:
471                tests.remove(l)
472                tests.insert(0, l)
473    if args.short:
474        tests = [t for t in tests if t not in long_tests]
475
476    logger.setLevel(debug_level)
477    if not args.nocurses:
478        log_handler = logging.FileHandler('parallel-vm.log')
479    else:
480        log_handler = logging.StreamHandler(sys.stdout)
481    log_handler.setLevel(debug_level)
482    fmt = "%(asctime)s %(levelname)s %(message)s"
483    log_formatter = logging.Formatter(fmt)
484    log_handler.setFormatter(log_formatter)
485    logger.addHandler(log_handler)
486
487    vm = {}
488    for i in range(0, num_servers):
489        cmd = [os.path.join(scriptsdir, 'vm-run.sh'),
490               '--timestamp', str(timestamp),
491               '--ext', 'srv.%d' % (i + 1),
492               '-i'] + codecov_args + extra_args
493        if args.telnet:
494            cmd += ['--telnet', str(args.telnet + i)]
495        vm[i] = {}
496        vm[i]['idx'] = i
497        vm[i]['starting'] = False
498        vm[i]['started'] = False
499        vm[i]['cmd'] = cmd
500        vm[i]['proc'] = None
501        vm[i]['out'] = ""
502        vm[i]['pending'] = ""
503        vm[i]['err'] = ""
504        vm[i]['failed'] = []
505        vm[i]['fail_seq'] = []
506        vm[i]['skip_reason'] = []
507    print('')
508
509    if not args.nocurses:
510        curses.wrapper(show_progress)
511    else:
512        class FakeScreen:
513            def leaveok(self, n):
514                pass
515            def refresh(self):
516                pass
517            def addstr(self, *args, **kw):
518                pass
519            def move(self, x, y):
520                pass
521            def clrtoeol(self):
522                pass
523        show_progress(FakeScreen())
524
525    with open('{}/{}-parallel.log'.format(dir, timestamp), 'w') as f:
526        for i in range(0, num_servers):
527            f.write('VM {}\n{}\n{}\n'.format(i + 1, vm[i]['out'], vm[i]['err']))
528        first = True
529        for i in range(0, num_servers):
530            for line in vm[i]['out'].splitlines():
531                if line.startswith("FAIL "):
532                    if first:
533                        first = False
534                        print("Logs for failed test cases:")
535                        f.write("Logs for failed test cases:\n")
536                    fname = "%s/%d.srv.%d/%s.log" % (dir, timestamp, i + 1,
537                                                     line.split(' ')[1])
538                    print(fname)
539                    f.write("%s\n" % fname)
540
541    failed = get_failed(vm)
542
543    if first_run_failures:
544        print("To re-run same failure sequence(s):")
545        for i in range(0, num_servers):
546            if len(vm[i]['failed']) == 0:
547                continue
548            print("./vm-run.sh", end=' ')
549            if args.long:
550                print("--long", end=' ')
551            skip = len(vm[i]['fail_seq'])
552            skip -= min(skip, 30)
553            for t in vm[i]['fail_seq']:
554                if skip > 0:
555                    skip -= 1
556                    continue
557                print(t, end=' ')
558            print('')
559        print("Failed test cases:")
560        for f in first_run_failures:
561            print(f, end=' ')
562            logger.info("Failed: " + f)
563        print('')
564    double_failed = []
565    for name in failed:
566        double_failed.append(name)
567    for test in first_run_failures:
568        double_failed.remove(test)
569    if not rerun_failures:
570        pass
571    elif failed and not double_failed:
572        print("All failed cases passed on retry")
573        logger.info("All failed cases passed on retry")
574    elif double_failed:
575        print("Failed even on retry:")
576        for f in double_failed:
577            print(f, end=' ')
578            logger.info("Failed on retry: " + f)
579        print('')
580    res = "TOTAL={} PASS={} FAIL={} SKIP={}".format(total_started,
581                                                    total_passed,
582                                                    total_failed,
583                                                    total_skipped)
584    print(res)
585    logger.info(res)
586    print("Logs: " + dir + '/' + str(timestamp))
587    logger.info("Logs: " + dir + '/' + str(timestamp))
588
589    skip_reason = []
590    for i in range(num_servers):
591        if not vm[i]['started']:
592            continue
593        skip_reason += vm[i]['skip_reason']
594        if len(vm[i]['pending']) > 0:
595            logger.info("Unprocessed stdout from VM[%d]: '%s'" %
596                        (i, vm[i]['pending']))
597        log = '{}/{}.srv.{}/console'.format(dir, timestamp, i + 1)
598        with open(log, 'r') as f:
599            if "Kernel panic" in f.read():
600                print("Kernel panic in " + log)
601                logger.info("Kernel panic in " + log)
602    missing = {}
603    missing['OCV not supported'] = 'OCV'
604    missing['sigma_dut not available'] = 'sigma_dut'
605    missing['Skip test case with long duration due to --long not specified'] = 'long'
606    missing['TEST_ALLOC_FAIL not supported' ] = 'TEST_FAIL'
607    missing['TEST_ALLOC_FAIL not supported in the build'] = 'TEST_FAIL'
608    missing['TEST_FAIL not supported' ] = 'TEST_FAIL'
609    missing['veth not supported (kernel CONFIG_VETH)'] = 'KERNEL:CONFIG_VETH'
610    missing['WPA-EAP-SUITE-B-192 not supported'] = 'CONFIG_SUITEB192'
611    missing['WPA-EAP-SUITE-B not supported'] = 'CONFIG_SUITEB'
612    missing['wmediumd not available'] = 'wmediumd'
613    missing['DPP not supported'] = 'CONFIG_DPP'
614    missing['DPP version 2 not supported'] = 'CONFIG_DPP2'
615    missing['EAP method PWD not supported in the build'] = 'CONFIG_EAP_PWD'
616    missing['EAP method TEAP not supported in the build'] = 'CONFIG_EAP_TEAP'
617    missing['FILS not supported'] = 'CONFIG_FILS'
618    missing['FILS-SK-PFS not supported'] = 'CONFIG_FILS_SK_PFS'
619    missing['OWE not supported'] = 'CONFIG_OWE'
620    missing['SAE not supported'] = 'CONFIG_SAE'
621    missing['Not using OpenSSL'] = 'CONFIG_TLS=openssl'
622    missing['wpa_supplicant TLS library is not OpenSSL: internal'] = 'CONFIG_TLS=openssl'
623    missing_items = []
624    other_reasons = []
625    for reason in sorted(set(skip_reason)):
626        if reason in missing:
627            missing_items.append(missing[reason])
628        elif reason.startswith('OCSP-multi not supported with this TLS library'):
629            missing_items.append('OCSP-MULTI')
630        else:
631            other_reasons.append(reason)
632    if missing_items:
633        print("Missing items (SKIP):", missing_items)
634    if other_reasons:
635        print("Other skip reasons:", other_reasons)
636
637    for i in range(num_servers):
638        unknown = ""
639        for line in vm[i]['out'].splitlines():
640            if not known_output(tests, line):
641                unknown += line + "\n"
642        if unknown:
643            print("\nVM %d - unexpected stdout output:\n%s" % (i, unknown))
644        if vm[i]['err']:
645            print("\nVM %d - unexpected stderr output:\n%s\n" % (i, vm[i]['err']))
646
647    if codecov:
648        print("Code coverage - preparing report")
649        for i in range(num_servers):
650            subprocess.check_call([os.path.join(scriptsdir,
651                                                'process-codecov.sh'),
652                                   logdir + ".srv.%d" % (i + 1),
653                                   str(i)])
654        subprocess.check_call([os.path.join(scriptsdir, 'combine-codecov.sh'),
655                               logdir])
656        print("file://%s/index.html" % logdir)
657        logger.info("Code coverage report: file://%s/index.html" % logdir)
658
659    if double_failed or (failed and not rerun_failures):
660        logger.info("Test run complete - failures found")
661        sys.exit(2)
662    if failed:
663        logger.info("Test run complete - failures found on first run; passed on retry")
664        sys.exit(1)
665    logger.info("Test run complete - no failures")
666    sys.exit(0)
667
668if __name__ == "__main__":
669    main()
670