xref: /qemu/scripts/device-crash-test (revision 47b43acd)
1#!/usr/bin/env python3
2#
3#  Copyright (c) 2017 Red Hat Inc
4#
5# Author:
6#  Eduardo Habkost <ehabkost@redhat.com>
7#
8# This program is free software; you can redistribute it and/or modify
9# it under the terms of the GNU General Public License as published by
10# the Free Software Foundation; either version 2 of the License, or
11# (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License along
19# with this program; if not, write to the Free Software Foundation, Inc.,
20# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
22"""
23Run QEMU with all combinations of -machine and -device types,
24check for crashes and unexpected errors.
25"""
26
27import os
28import sys
29import glob
30import logging
31import traceback
32import re
33import random
34import argparse
35from itertools import chain
36
37sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
38from qemu.machine import QEMUMachine
39
40logger = logging.getLogger('device-crash-test')
41dbg = logger.debug
42
43
44# Purposes of the following rule list:
45# * Avoiding verbose log messages when we find known non-fatal
46#   (exitcode=1) errors
47# * Avoiding fatal errors when we find known crashes
48# * Skipping machines/devices that are known not to work out of
49#   the box, when running in --quick mode
50#
51# Keeping the rule list updated is desirable, but not required,
52# because unexpected cases where QEMU exits with exitcode=1 will
53# just trigger a INFO message.
54
55# Valid error rule keys:
56# * accel: regexp, full match only
57# * machine: regexp, full match only
58# * device: regexp, full match only
59# * log: regexp, partial match allowed
60# * exitcode: if not present, defaults to 1. If None, matches any exitcode
61# * warn: if True, matching failures will be logged as warnings
62# * expected: if True, QEMU is expected to always fail every time
63#   when testing the corresponding test case
64# * loglevel: log level of log output when there's a match.
65ERROR_RULE_LIST = [
66    # Machines that won't work out of the box:
67    #             MACHINE                         | ERROR MESSAGE
68    {'machine':'niagara', 'expected':True},       # Unable to load a firmware for -M niagara
69    {'machine':'boston', 'expected':True},        # Please provide either a -kernel or -bios argument
70    {'machine':'leon3_generic', 'expected':True}, # Can't read bios image (null)
71
72    # devices that don't work out of the box because they require extra options to "-device DEV":
73    #            DEVICE                                    | ERROR MESSAGE
74    {'device':'.*-(i386|x86_64)-cpu', 'expected':True},    # CPU socket-id is not set
75    {'device':'icp', 'expected':True},                     # icp_realize: required link 'xics' not found: Property '.xics' not found
76    {'device':'ics', 'expected':True},                     # ics_base_realize: required link 'xics' not found: Property '.xics' not found
77    # "-device ide-cd" does work on more recent QEMU versions, so it doesn't have expected=True
78    {'device':'ide-cd'},                                 # No drive specified
79    {'device':'ide-hd', 'expected':True},                  # No drive specified
80    {'device':'ipmi-bmc-extern', 'expected':True},         # IPMI external bmc requires chardev attribute
81    {'device':'isa-debugcon', 'expected':True},            # Can't create serial device, empty char device
82    {'device':'isa-ipmi-bt', 'expected':True},             # IPMI device requires a bmc attribute to be set
83    {'device':'isa-ipmi-kcs', 'expected':True},            # IPMI device requires a bmc attribute to be set
84    {'device':'isa-parallel', 'expected':True},            # Can't create serial device, empty char device
85    {'device':'ivshmem-doorbell', 'expected':True},        # You must specify a 'chardev'
86    {'device':'ivshmem-plain', 'expected':True},           # You must specify a 'memdev'
87    {'device':'loader', 'expected':True},                  # please include valid arguments
88    {'device':'nand', 'expected':True},                    # Unsupported NAND block size 0x1
89    {'device':'nvdimm', 'expected':True},                  # 'memdev' property is not set
90    {'device':'nvme', 'expected':True},                    # Device initialization failed
91    {'device':'pc-dimm', 'expected':True},                 # 'memdev' property is not set
92    {'device':'pci-bridge', 'expected':True},              # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
93    {'device':'pci-bridge-seat', 'expected':True},         # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
94    {'device':'pxb', 'expected':True},                     # Bridge chassis not specified. Each bridge is required to be assigned a unique chassis id > 0.
95    {'device':'scsi-block', 'expected':True},              # drive property not set
96    {'device':'scsi-generic', 'expected':True},            # drive property not set
97    {'device':'scsi-hd', 'expected':True},                 # drive property not set
98    {'device':'spapr-pci-host-bridge', 'expected':True},   # BUID not specified for PHB
99    {'device':'spapr-rng', 'expected':True},               # spapr-rng needs an RNG backend!
100    {'device':'spapr-vty', 'expected':True},               # chardev property not set
101    {'device':'tpm-tis', 'expected':True},                 # tpm_tis: backend driver with id (null) could not be found
102    {'device':'unimplemented-device', 'expected':True},    # property 'size' not specified or zero
103    {'device':'usb-braille', 'expected':True},             # Property chardev is required
104    {'device':'usb-mtp', 'expected':True},                 # rootdir property must be configured
105    {'device':'usb-redir', 'expected':True},               # Parameter 'chardev' is missing
106    {'device':'usb-serial', 'expected':True},              # Property chardev is required
107    {'device':'usb-storage', 'expected':True},             # drive property not set
108    {'device':'vfio-amd-xgbe', 'expected':True},           # -device vfio-amd-xgbe: vfio error: wrong host device name
109    {'device':'vfio-calxeda-xgmac', 'expected':True},      # -device vfio-calxeda-xgmac: vfio error: wrong host device name
110    {'device':'vfio-pci', 'expected':True},                # No provided host device
111    {'device':'vfio-pci-igd-lpc-bridge', 'expected':True}, # VFIO dummy ISA/LPC bridge must have address 1f.0
112    {'device':'vhost-scsi.*', 'expected':True},            # vhost-scsi: missing wwpn
113    {'device':'vhost-vsock-device', 'expected':True},      # guest-cid property must be greater than 2
114    {'device':'vhost-vsock-pci', 'expected':True},         # guest-cid property must be greater than 2
115    {'device':'virtio-9p-ccw', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
116    {'device':'virtio-9p-device', 'expected':True},        # 9pfs device couldn't find fsdev with the id = NULL
117    {'device':'virtio-9p-pci', 'expected':True},           # 9pfs device couldn't find fsdev with the id = NULL
118    {'device':'virtio-blk-ccw', 'expected':True},          # drive property not set
119    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
120    {'device':'virtio-blk-device', 'expected':True},       # drive property not set
121    {'device':'virtio-blk-pci', 'expected':True},          # drive property not set
122    {'device':'virtio-crypto-ccw', 'expected':True},       # 'cryptodev' parameter expects a valid object
123    {'device':'virtio-crypto-device', 'expected':True},    # 'cryptodev' parameter expects a valid object
124    {'device':'virtio-crypto-pci', 'expected':True},       # 'cryptodev' parameter expects a valid object
125    {'device':'virtio-input-host-device', 'expected':True}, # evdev property is required
126    {'device':'virtio-input-host-pci', 'expected':True},   # evdev property is required
127    {'device':'xen-pvdevice', 'expected':True},            # Device ID invalid, it must always be supplied
128    {'device':'vhost-vsock-ccw', 'expected':True},         # guest-cid property must be greater than 2
129    {'device':'zpci', 'expected':True},                    # target must be defined
130    {'device':'pnv-(occ|icp|lpc)', 'expected':True},       # required link 'xics' not found: Property '.xics' not found
131    {'device':'powernv-cpu-.*', 'expected':True},          # pnv_core_realize: required link 'xics' not found: Property '.xics' not found
132
133    # ioapic devices are already created by pc and will fail:
134    {'machine':'q35|pc.*', 'device':'kvm-ioapic', 'expected':True}, # Only 1 ioapics allowed
135    {'machine':'q35|pc.*', 'device':'ioapic', 'expected':True},     # Only 1 ioapics allowed
136
137    # "spapr-cpu-core needs a pseries machine"
138    {'machine':'(?!pseries).*', 'device':'.*-spapr-cpu-core', 'expected':True},
139
140    # KVM-specific devices shouldn't be tried without accel=kvm:
141    {'accel':'(?!kvm).*', 'device':'kvmclock', 'expected':True},
142
143    # xen-specific machines and devices:
144    {'accel':'(?!xen).*', 'machine':'xen.*', 'expected':True},
145    {'accel':'(?!xen).*', 'device':'xen-.*', 'expected':True},
146
147    # this fails on some machine-types, but not all, so they don't have expected=True:
148    {'device':'vmgenid'}, # vmgenid requires DMA write support in fw_cfg, which this machine type does not provide
149
150    # Silence INFO messages for errors that are common on multiple
151    # devices/machines:
152    {'log':r"No '[\w-]+' bus found for device '[\w-]+'"},
153    {'log':r"images* must be given with the 'pflash' parameter"},
154    {'log':r"(Guest|ROM|Flash|Kernel) image must be specified"},
155    {'log':r"[cC]ould not load [\w ]+ (BIOS|bios) '[\w-]+\.bin'"},
156    {'log':r"Couldn't find rom image '[\w-]+\.bin'"},
157    {'log':r"speed mismatch trying to attach usb device"},
158    {'log':r"Can't create a second ISA bus"},
159    {'log':r"duplicate fw_cfg file name"},
160    # sysbus-related error messages: most machines reject most dynamic sysbus devices:
161    {'log':r"Option '-device [\w.,-]+' cannot be handled by this machine"},
162    {'log':r"Device [\w.,-]+ is not supported by this machine yet"},
163    {'log':r"Device [\w.,-]+ can not be dynamically instantiated"},
164    {'log':r"Platform Bus: Can not fit MMIO region of size "},
165    # other more specific errors we will ignore:
166    {'device':'.*-spapr-cpu-core', 'log':r"CPU core type should be"},
167    {'log':r"MSI(-X)? is not supported by interrupt controller"},
168    {'log':r"pxb-pcie? devices cannot reside on a PCIe? bus"},
169    {'log':r"Ignoring smp_cpus value"},
170    {'log':r"sd_init failed: Drive 'sd0' is already in use because it has been automatically connected to another device"},
171    {'log':r"This CPU requires a smaller page size than the system is using"},
172    {'log':r"MSI-X support is mandatory in the S390 architecture"},
173    {'log':r"rom check and register reset failed"},
174    {'log':r"Unable to initialize GIC, CPUState for CPU#0 not valid"},
175    {'log':r"Multiple VT220 operator consoles are not supported"},
176    {'log':r"core 0 already populated"},
177    {'log':r"could not find stage1 bootloader"},
178    {'log':r"No '.*' bus found for device"},
179
180    # other exitcode=1 failures not listed above will just generate INFO messages:
181    {'exitcode':1, 'loglevel':logging.INFO},
182
183    # everything else (including SIGABRT and SIGSEGV) will be a fatal error:
184    {'exitcode':None, 'fatal':True, 'loglevel':logging.FATAL},
185]
186
187
188def errorRuleTestCaseMatch(rule, t):
189    """Check if a test case specification can match a error rule
190
191    This only checks if a error rule is a candidate match
192    for a given test case, it won't check if the test case
193    results/output match the rule.  See ruleListResultMatch().
194    """
195    return (('machine' not in rule or
196             'machine' not in t or
197             re.match(rule['machine'] + '$', t['machine'])) and
198            ('accel' not in rule or
199             'accel' not in t or
200             re.match(rule['accel'] + '$', t['accel'])) and
201            ('device' not in rule or
202             'device' not in t or
203             re.match(rule['device'] + '$', t['device'])))
204
205
206def ruleListCandidates(t):
207    """Generate the list of candidates that can match a test case"""
208    for i, rule in enumerate(ERROR_RULE_LIST):
209        if errorRuleTestCaseMatch(rule, t):
210            yield (i, rule)
211
212
213def findExpectedResult(t):
214    """Check if there's an expected=True error rule for a test case
215
216    Returns (i, rule) tuple, where i is the index in
217    ERROR_RULE_LIST and rule is the error rule itself.
218    """
219    for i, rule in ruleListCandidates(t):
220        if rule.get('expected'):
221            return (i, rule)
222
223
224def ruleListResultMatch(rule, r):
225    """Check if test case results/output match a error rule
226
227    It is valid to call this function only if
228    errorRuleTestCaseMatch() is True for the rule (e.g. on
229    rules returned by ruleListCandidates())
230    """
231    assert errorRuleTestCaseMatch(rule, r['testcase'])
232    return ((rule.get('exitcode', 1) is None or
233             r['exitcode'] == rule.get('exitcode', 1)) and
234            ('log' not in rule or
235             re.search(rule['log'], r['log'], re.MULTILINE)))
236
237
238def checkResultRuleList(r):
239    """Look up error rule for a given test case result
240
241    Returns (i, rule) tuple, where i is the index in
242    ERROR_RULE_LIST and rule is the error rule itself.
243    """
244    for i, rule in ruleListCandidates(r['testcase']):
245        if ruleListResultMatch(rule, r):
246            return i, rule
247
248    raise Exception("this should never happen")
249
250
251def qemuOptsEscape(s):
252    """Escape option value QemuOpts"""
253    return s.replace(",", ",,")
254
255
256def formatTestCase(t):
257    """Format test case info as "key=value key=value" for prettier logging output"""
258    return ' '.join('%s=%s' % (k, v) for k, v in t.items())
259
260
261def qomListTypeNames(vm, **kwargs):
262    """Run qom-list-types QMP command, return type names"""
263    types = vm.command('qom-list-types', **kwargs)
264    return [t['name'] for t in types]
265
266
267def infoQDM(vm):
268    """Parse 'info qdm' output"""
269    args = {'command-line': 'info qdm'}
270    devhelp = vm.command('human-monitor-command', **args)
271    for l in devhelp.split('\n'):
272        l = l.strip()
273        if l == '' or l.endswith(':'):
274            continue
275        d = {'name': re.search(r'name "([^"]+)"', l).group(1),
276             'no-user': (re.search(', no-user', l) is not None)}
277        yield d
278
279
280class QemuBinaryInfo(object):
281    def __init__(self, binary, devtype):
282        if devtype is None:
283            devtype = 'device'
284
285        self.binary = binary
286        self._machine_info = {}
287
288        dbg("devtype: %r", devtype)
289        args = ['-S', '-machine', 'none,accel=kvm:tcg']
290        dbg("querying info for QEMU binary: %s", binary)
291        vm = QEMUMachine(binary=binary, args=args)
292        vm.launch()
293        try:
294            self.alldevs = set(qomListTypeNames(vm, implements=devtype, abstract=False))
295            # there's no way to query DeviceClass::user_creatable using QMP,
296            # so use 'info qdm':
297            self.no_user_devs = set([d['name'] for d in infoQDM(vm, ) if d['no-user']])
298            self.machines = list(m['name'] for m in vm.command('query-machines'))
299            self.user_devs = self.alldevs.difference(self.no_user_devs)
300            self.kvm_available = vm.command('query-kvm')['enabled']
301        finally:
302            vm.shutdown()
303
304    def machineInfo(self, machine):
305        """Query for information on a specific machine-type
306
307        Results are cached internally, in case the same machine-
308        type is queried multiple times.
309        """
310        if machine in self._machine_info:
311            return self._machine_info[machine]
312
313        mi = {}
314        args = ['-S', '-machine', '%s' % (machine)]
315        dbg("querying machine info for binary=%s machine=%s", self.binary, machine)
316        vm = QEMUMachine(binary=self.binary, args=args)
317        try:
318            vm.launch()
319            mi['runnable'] = True
320        except Exception:
321            dbg("exception trying to run binary=%s machine=%s", self.binary, machine, exc_info=sys.exc_info())
322            dbg("log: %r", vm.get_log())
323            mi['runnable'] = False
324
325        vm.shutdown()
326        self._machine_info[machine] = mi
327        return mi
328
329
330BINARY_INFO = {}
331
332
333def getBinaryInfo(args, binary):
334    if binary not in BINARY_INFO:
335        BINARY_INFO[binary] = QemuBinaryInfo(binary, args.devtype)
336    return BINARY_INFO[binary]
337
338
339def checkOneCase(args, testcase):
340    """Check one specific case
341
342    Returns a dictionary containing failure information on error,
343    or None on success
344    """
345    binary = testcase['binary']
346    accel = testcase['accel']
347    machine = testcase['machine']
348    device = testcase['device']
349
350    dbg("will test: %r", testcase)
351
352    args = ['-S', '-machine', '%s,accel=%s' % (machine, accel),
353            '-device', qemuOptsEscape(device)]
354    cmdline = ' '.join([binary] + args)
355    dbg("will launch QEMU: %s", cmdline)
356    vm = QEMUMachine(binary=binary, args=args)
357
358    exc_traceback = None
359    try:
360        vm.launch()
361    except Exception:
362        exc_traceback = traceback.format_exc()
363        dbg("Exception while running test case")
364    finally:
365        vm.shutdown()
366        ec = vm.exitcode()
367        log = vm.get_log()
368
369    if exc_traceback is not None or ec != 0:
370        return {'exc_traceback':exc_traceback,
371                'exitcode':ec,
372                'log':log,
373                'testcase':testcase,
374                'cmdline':cmdline}
375
376
377def binariesToTest(args, testcase):
378    if args.qemu:
379        r = args.qemu
380    else:
381        r = [f.path for f in os.scandir('.')
382             if f.name.startswith('qemu-system-') and
383                f.is_file() and os.access(f, os.X_OK)]
384    return r
385
386
387def accelsToTest(args, testcase):
388    if getBinaryInfo(args, testcase['binary']).kvm_available:
389        yield 'kvm'
390    yield 'tcg'
391
392
393def machinesToTest(args, testcase):
394    return getBinaryInfo(args, testcase['binary']).machines
395
396
397def devicesToTest(args, testcase):
398    return getBinaryInfo(args, testcase['binary']).user_devs
399
400
401TESTCASE_VARIABLES = [
402    ('binary', binariesToTest),
403    ('accel', accelsToTest),
404    ('machine', machinesToTest),
405    ('device', devicesToTest),
406]
407
408
409def genCases1(args, testcases, var, fn):
410    """Generate new testcases for one variable
411
412    If an existing item already has a variable set, don't
413    generate new items and just return it directly. This
414    allows the "-t" command-line option to be used to choose
415    a specific test case.
416    """
417    for testcase in testcases:
418        if var in testcase:
419            yield testcase.copy()
420        else:
421            for i in fn(args, testcase):
422                t = testcase.copy()
423                t[var] = i
424                yield t
425
426
427def genCases(args, testcase):
428    """Generate test cases for all variables
429    """
430    cases = [testcase.copy()]
431    for var, fn in TESTCASE_VARIABLES:
432        dbg("var: %r, fn: %r", var, fn)
433        cases = genCases1(args, cases, var, fn)
434    return cases
435
436
437def casesToTest(args, testcase):
438    cases = genCases(args, testcase)
439    if args.random:
440        cases = list(cases)
441        cases = random.sample(cases, min(args.random, len(cases)))
442    if args.debug:
443        cases = list(cases)
444        dbg("%d test cases to test", len(cases))
445    if args.shuffle:
446        cases = list(cases)
447        random.shuffle(cases)
448    return cases
449
450
451def logFailure(f, level):
452    t = f['testcase']
453    logger.log(level, "failed: %s", formatTestCase(t))
454    logger.log(level, "cmdline: %s", f['cmdline'])
455    for l in f['log'].strip().split('\n'):
456        logger.log(level, "log: %s", l)
457    logger.log(level, "exit code: %r", f['exitcode'])
458    if f['exc_traceback']:
459        logger.log(level, "exception:")
460        for l in f['exc_traceback'].split('\n'):
461            logger.log(level, "  %s", l.rstrip('\n'))
462
463
464def main():
465    parser = argparse.ArgumentParser(description="QEMU -device crash test")
466    parser.add_argument('-t', metavar='KEY=VALUE', nargs='*',
467                        help="Limit test cases to KEY=VALUE",
468                        action='append', dest='testcases', default=[])
469    parser.add_argument('-d', '--debug', action='store_true',
470                        help='debug output')
471    parser.add_argument('-v', '--verbose', action='store_true', default=True,
472                        help='verbose output')
473    parser.add_argument('-q', '--quiet', dest='verbose', action='store_false',
474                        help='non-verbose output')
475    parser.add_argument('-r', '--random', type=int, metavar='COUNT',
476                        help='run a random sample of COUNT test cases',
477                        default=0)
478    parser.add_argument('--shuffle', action='store_true',
479                        help='Run test cases in random order')
480    parser.add_argument('--dry-run', action='store_true',
481                        help="Don't run any tests, just generate list")
482    parser.add_argument('-D', '--devtype', metavar='TYPE',
483                        help="Test only device types that implement TYPE")
484    parser.add_argument('-Q', '--quick', action='store_true', default=True,
485                        help="Quick mode: skip test cases that are expected to fail")
486    parser.add_argument('-F', '--full', action='store_false', dest='quick',
487                        help="Full mode: test cases that are expected to fail")
488    parser.add_argument('--strict', action='store_true', dest='strict',
489                        help="Treat all warnings as fatal")
490    parser.add_argument('qemu', nargs='*', metavar='QEMU',
491                        help='QEMU binary to run')
492    args = parser.parse_args()
493
494    if args.debug:
495        lvl = logging.DEBUG
496    elif args.verbose:
497        lvl = logging.INFO
498    else:
499        lvl = logging.WARN
500    logging.basicConfig(stream=sys.stdout, level=lvl, format='%(levelname)s: %(message)s')
501
502    fatal_failures = []
503    wl_stats = {}
504    skipped = 0
505    total = 0
506
507    tc = {}
508    dbg("testcases: %r", args.testcases)
509    if args.testcases:
510        for t in chain(*args.testcases):
511            for kv in t.split():
512                k, v = kv.split('=', 1)
513                tc[k] = v
514
515    if len(binariesToTest(args, tc)) == 0:
516        print("No QEMU binary found", file=sys.stderr)
517        parser.print_usage(sys.stderr)
518        return 1
519
520    for t in casesToTest(args, tc):
521        logger.info("running test case: %s", formatTestCase(t))
522        total += 1
523
524        expected_match = findExpectedResult(t)
525        if (args.quick and
526                (expected_match or
527                 not getBinaryInfo(args, t['binary']).machineInfo(t['machine'])['runnable'])):
528            dbg("skipped: %s", formatTestCase(t))
529            skipped += 1
530            continue
531
532        if args.dry_run:
533            continue
534
535        try:
536            f = checkOneCase(args, t)
537        except KeyboardInterrupt:
538            break
539
540        if f:
541            i, rule = checkResultRuleList(f)
542            dbg("testcase: %r, rule list match: %r", t, rule)
543            wl_stats.setdefault(i, []).append(f)
544            level = rule.get('loglevel', logging.DEBUG)
545            logFailure(f, level)
546            if rule.get('fatal') or (args.strict and level >= logging.WARN):
547                fatal_failures.append(f)
548        else:
549            dbg("success: %s", formatTestCase(t))
550            if expected_match:
551                logger.warn("Didn't fail as expected: %s", formatTestCase(t))
552
553    logger.info("Total: %d test cases", total)
554    if skipped:
555        logger.info("Skipped %d test cases", skipped)
556
557    if args.debug:
558        stats = sorted([(len(wl_stats.get(i, [])), rule) for i, rule in
559                         enumerate(ERROR_RULE_LIST)], key=lambda x: x[0])
560        for count, rule in stats:
561            dbg("error rule stats: %d: %r", count, rule)
562
563    if fatal_failures:
564        for f in fatal_failures:
565            t = f['testcase']
566            logger.error("Fatal failure: %s", formatTestCase(t))
567        logger.error("Fatal failures on some machine/device combinations")
568        return 1
569
570if __name__ == '__main__':
571    sys.exit(main())
572