1#!/usr/bin/env python3
2# SPDX-License-Identifier: GPL-2.0-only
3#
4# top-like utility for displaying kvm statistics
5#
6# Copyright 2006-2008 Qumranet Technologies
7# Copyright 2008-2011 Red Hat, Inc.
8#
9# Authors:
10#  Avi Kivity <avi@redhat.com>
11#
12"""The kvm_stat module outputs statistics about running KVM VMs
13
14Three different ways of output formatting are available:
15- as a top-like text ui
16- in a key -> value format
17- in an all keys, all values format
18
19The data is sampled from the KVM's debugfs entries and its perf events.
20"""
21from __future__ import print_function
22
23import curses
24import sys
25import locale
26import os
27import time
28import argparse
29import ctypes
30import fcntl
31import resource
32import struct
33import re
34import subprocess
35import signal
36from collections import defaultdict, namedtuple
37from functools import reduce
38from datetime import datetime
39
40VMX_EXIT_REASONS = {
41    'EXCEPTION_NMI':        0,
42    'EXTERNAL_INTERRUPT':   1,
43    'TRIPLE_FAULT':         2,
44    'PENDING_INTERRUPT':    7,
45    'NMI_WINDOW':           8,
46    'TASK_SWITCH':          9,
47    'CPUID':                10,
48    'HLT':                  12,
49    'INVLPG':               14,
50    'RDPMC':                15,
51    'RDTSC':                16,
52    'VMCALL':               18,
53    'VMCLEAR':              19,
54    'VMLAUNCH':             20,
55    'VMPTRLD':              21,
56    'VMPTRST':              22,
57    'VMREAD':               23,
58    'VMRESUME':             24,
59    'VMWRITE':              25,
60    'VMOFF':                26,
61    'VMON':                 27,
62    'CR_ACCESS':            28,
63    'DR_ACCESS':            29,
64    'IO_INSTRUCTION':       30,
65    'MSR_READ':             31,
66    'MSR_WRITE':            32,
67    'INVALID_STATE':        33,
68    'MWAIT_INSTRUCTION':    36,
69    'MONITOR_INSTRUCTION':  39,
70    'PAUSE_INSTRUCTION':    40,
71    'MCE_DURING_VMENTRY':   41,
72    'TPR_BELOW_THRESHOLD':  43,
73    'APIC_ACCESS':          44,
74    'EPT_VIOLATION':        48,
75    'EPT_MISCONFIG':        49,
76    'WBINVD':               54,
77    'XSETBV':               55,
78    'APIC_WRITE':           56,
79    'INVPCID':              58,
80}
81
82SVM_EXIT_REASONS = {
83    'READ_CR0':       0x000,
84    'READ_CR3':       0x003,
85    'READ_CR4':       0x004,
86    'READ_CR8':       0x008,
87    'WRITE_CR0':      0x010,
88    'WRITE_CR3':      0x013,
89    'WRITE_CR4':      0x014,
90    'WRITE_CR8':      0x018,
91    'READ_DR0':       0x020,
92    'READ_DR1':       0x021,
93    'READ_DR2':       0x022,
94    'READ_DR3':       0x023,
95    'READ_DR4':       0x024,
96    'READ_DR5':       0x025,
97    'READ_DR6':       0x026,
98    'READ_DR7':       0x027,
99    'WRITE_DR0':      0x030,
100    'WRITE_DR1':      0x031,
101    'WRITE_DR2':      0x032,
102    'WRITE_DR3':      0x033,
103    'WRITE_DR4':      0x034,
104    'WRITE_DR5':      0x035,
105    'WRITE_DR6':      0x036,
106    'WRITE_DR7':      0x037,
107    'EXCP_BASE':      0x040,
108    'INTR':           0x060,
109    'NMI':            0x061,
110    'SMI':            0x062,
111    'INIT':           0x063,
112    'VINTR':          0x064,
113    'CR0_SEL_WRITE':  0x065,
114    'IDTR_READ':      0x066,
115    'GDTR_READ':      0x067,
116    'LDTR_READ':      0x068,
117    'TR_READ':        0x069,
118    'IDTR_WRITE':     0x06a,
119    'GDTR_WRITE':     0x06b,
120    'LDTR_WRITE':     0x06c,
121    'TR_WRITE':       0x06d,
122    'RDTSC':          0x06e,
123    'RDPMC':          0x06f,
124    'PUSHF':          0x070,
125    'POPF':           0x071,
126    'CPUID':          0x072,
127    'RSM':            0x073,
128    'IRET':           0x074,
129    'SWINT':          0x075,
130    'INVD':           0x076,
131    'PAUSE':          0x077,
132    'HLT':            0x078,
133    'INVLPG':         0x079,
134    'INVLPGA':        0x07a,
135    'IOIO':           0x07b,
136    'MSR':            0x07c,
137    'TASK_SWITCH':    0x07d,
138    'FERR_FREEZE':    0x07e,
139    'SHUTDOWN':       0x07f,
140    'VMRUN':          0x080,
141    'VMMCALL':        0x081,
142    'VMLOAD':         0x082,
143    'VMSAVE':         0x083,
144    'STGI':           0x084,
145    'CLGI':           0x085,
146    'SKINIT':         0x086,
147    'RDTSCP':         0x087,
148    'ICEBP':          0x088,
149    'WBINVD':         0x089,
150    'MONITOR':        0x08a,
151    'MWAIT':          0x08b,
152    'MWAIT_COND':     0x08c,
153    'XSETBV':         0x08d,
154    'NPF':            0x400,
155}
156
157# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
158AARCH64_EXIT_REASONS = {
159    'UNKNOWN':      0x00,
160    'WFI':          0x01,
161    'CP15_32':      0x03,
162    'CP15_64':      0x04,
163    'CP14_MR':      0x05,
164    'CP14_LS':      0x06,
165    'FP_ASIMD':     0x07,
166    'CP10_ID':      0x08,
167    'CP14_64':      0x0C,
168    'ILL_ISS':      0x0E,
169    'SVC32':        0x11,
170    'HVC32':        0x12,
171    'SMC32':        0x13,
172    'SVC64':        0x15,
173    'HVC64':        0x16,
174    'SMC64':        0x17,
175    'SYS64':        0x18,
176    'IABT':         0x20,
177    'IABT_HYP':     0x21,
178    'PC_ALIGN':     0x22,
179    'DABT':         0x24,
180    'DABT_HYP':     0x25,
181    'SP_ALIGN':     0x26,
182    'FP_EXC32':     0x28,
183    'FP_EXC64':     0x2C,
184    'SERROR':       0x2F,
185    'BREAKPT':      0x30,
186    'BREAKPT_HYP':  0x31,
187    'SOFTSTP':      0x32,
188    'SOFTSTP_HYP':  0x33,
189    'WATCHPT':      0x34,
190    'WATCHPT_HYP':  0x35,
191    'BKPT32':       0x38,
192    'VECTOR32':     0x3A,
193    'BRK64':        0x3C,
194}
195
196# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
197USERSPACE_EXIT_REASONS = {
198    'UNKNOWN':          0,
199    'EXCEPTION':        1,
200    'IO':               2,
201    'HYPERCALL':        3,
202    'DEBUG':            4,
203    'HLT':              5,
204    'MMIO':             6,
205    'IRQ_WINDOW_OPEN':  7,
206    'SHUTDOWN':         8,
207    'FAIL_ENTRY':       9,
208    'INTR':             10,
209    'SET_TPR':          11,
210    'TPR_ACCESS':       12,
211    'S390_SIEIC':       13,
212    'S390_RESET':       14,
213    'DCR':              15,
214    'NMI':              16,
215    'INTERNAL_ERROR':   17,
216    'OSI':              18,
217    'PAPR_HCALL':       19,
218    'S390_UCONTROL':    20,
219    'WATCHDOG':         21,
220    'S390_TSCH':        22,
221    'EPR':              23,
222    'SYSTEM_EVENT':     24,
223}
224
225IOCTL_NUMBERS = {
226    'SET_FILTER':  0x40082406,
227    'ENABLE':      0x00002400,
228    'DISABLE':     0x00002401,
229    'RESET':       0x00002403,
230}
231
232signal_received = False
233
234ENCODING = locale.getpreferredencoding(False)
235TRACE_FILTER = re.compile(r'^[^\(]*$')
236
237
238class Arch(object):
239    """Encapsulates global architecture specific data.
240
241    Contains the performance event open syscall and ioctl numbers, as
242    well as the VM exit reasons for the architecture it runs on.
243
244    """
245    @staticmethod
246    def get_arch():
247        machine = os.uname()[4]
248
249        if machine.startswith('ppc'):
250            return ArchPPC()
251        elif machine.startswith('aarch64'):
252            return ArchA64()
253        elif machine.startswith('s390'):
254            return ArchS390()
255        else:
256            # X86_64
257            for line in open('/proc/cpuinfo'):
258                if not line.startswith('flags'):
259                    continue
260
261                flags = line.split()
262                if 'vmx' in flags:
263                    return ArchX86(VMX_EXIT_REASONS)
264                if 'svm' in flags:
265                    return ArchX86(SVM_EXIT_REASONS)
266                return
267
268    def tracepoint_is_child(self, field):
269        if (TRACE_FILTER.match(field)):
270            return None
271        return field.split('(', 1)[0]
272
273
274class ArchX86(Arch):
275    def __init__(self, exit_reasons):
276        self.sc_perf_evt_open = 298
277        self.ioctl_numbers = IOCTL_NUMBERS
278        self.exit_reason_field = 'exit_reason'
279        self.exit_reasons = exit_reasons
280
281    def debugfs_is_child(self, field):
282        """ Returns name of parent if 'field' is a child, None otherwise """
283        return None
284
285
286class ArchPPC(Arch):
287    def __init__(self):
288        self.sc_perf_evt_open = 319
289        self.ioctl_numbers = IOCTL_NUMBERS
290        self.ioctl_numbers['ENABLE'] = 0x20002400
291        self.ioctl_numbers['DISABLE'] = 0x20002401
292        self.ioctl_numbers['RESET'] = 0x20002403
293
294        # PPC comes in 32 and 64 bit and some generated ioctl
295        # numbers depend on the wordsize.
296        char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
297        self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
298        self.exit_reason_field = 'exit_nr'
299        self.exit_reasons = {}
300
301    def debugfs_is_child(self, field):
302        """ Returns name of parent if 'field' is a child, None otherwise """
303        return None
304
305
306class ArchA64(Arch):
307    def __init__(self):
308        self.sc_perf_evt_open = 241
309        self.ioctl_numbers = IOCTL_NUMBERS
310        self.exit_reason_field = 'esr_ec'
311        self.exit_reasons = AARCH64_EXIT_REASONS
312
313    def debugfs_is_child(self, field):
314        """ Returns name of parent if 'field' is a child, None otherwise """
315        return None
316
317
318class ArchS390(Arch):
319    def __init__(self):
320        self.sc_perf_evt_open = 331
321        self.ioctl_numbers = IOCTL_NUMBERS
322        self.exit_reason_field = None
323        self.exit_reasons = None
324
325    def debugfs_is_child(self, field):
326        """ Returns name of parent if 'field' is a child, None otherwise """
327        if field.startswith('instruction_'):
328            return 'exit_instruction'
329
330
331ARCH = Arch.get_arch()
332
333
334class perf_event_attr(ctypes.Structure):
335    """Struct that holds the necessary data to set up a trace event.
336
337    For an extensive explanation see perf_event_open(2) and
338    include/uapi/linux/perf_event.h, struct perf_event_attr
339
340    All fields that are not initialized in the constructor are 0.
341
342    """
343    _fields_ = [('type', ctypes.c_uint32),
344                ('size', ctypes.c_uint32),
345                ('config', ctypes.c_uint64),
346                ('sample_freq', ctypes.c_uint64),
347                ('sample_type', ctypes.c_uint64),
348                ('read_format', ctypes.c_uint64),
349                ('flags', ctypes.c_uint64),
350                ('wakeup_events', ctypes.c_uint32),
351                ('bp_type', ctypes.c_uint32),
352                ('bp_addr', ctypes.c_uint64),
353                ('bp_len', ctypes.c_uint64),
354                ]
355
356    def __init__(self):
357        super(self.__class__, self).__init__()
358        self.type = PERF_TYPE_TRACEPOINT
359        self.size = ctypes.sizeof(self)
360        self.read_format = PERF_FORMAT_GROUP
361
362
363PERF_TYPE_TRACEPOINT = 2
364PERF_FORMAT_GROUP = 1 << 3
365
366
367class Group(object):
368    """Represents a perf event group."""
369
370    def __init__(self):
371        self.events = []
372
373    def add_event(self, event):
374        self.events.append(event)
375
376    def read(self):
377        """Returns a dict with 'event name: value' for all events in the
378        group.
379
380        Values are read by reading from the file descriptor of the
381        event that is the group leader. See perf_event_open(2) for
382        details.
383
384        Read format for the used event configuration is:
385        struct read_format {
386            u64 nr; /* The number of events */
387            struct {
388                u64 value; /* The value of the event */
389            } values[nr];
390        };
391
392        """
393        length = 8 * (1 + len(self.events))
394        read_format = 'xxxxxxxx' + 'Q' * len(self.events)
395        return dict(zip([event.name for event in self.events],
396                        struct.unpack(read_format,
397                                      os.read(self.events[0].fd, length))))
398
399
400class Event(object):
401    """Represents a performance event and manages its life cycle."""
402    def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
403                 trace_filter, trace_set='kvm'):
404        self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
405        self.syscall = self.libc.syscall
406        self.name = name
407        self.fd = None
408        self._setup_event(group, trace_cpu, trace_pid, trace_point,
409                          trace_filter, trace_set)
410
411    def __del__(self):
412        """Closes the event's file descriptor.
413
414        As no python file object was created for the file descriptor,
415        python will not reference count the descriptor and will not
416        close it itself automatically, so we do it.
417
418        """
419        if self.fd:
420            os.close(self.fd)
421
422    def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
423        """Wrapper for the sys_perf_evt_open() syscall.
424
425        Used to set up performance events, returns a file descriptor or -1
426        on error.
427
428        Attributes are:
429        - syscall number
430        - struct perf_event_attr *
431        - pid or -1 to monitor all pids
432        - cpu number or -1 to monitor all cpus
433        - The file descriptor of the group leader or -1 to create a group.
434        - flags
435
436        """
437        return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
438                            ctypes.c_int(pid), ctypes.c_int(cpu),
439                            ctypes.c_int(group_fd), ctypes.c_long(flags))
440
441    def _setup_event_attribute(self, trace_set, trace_point):
442        """Returns an initialized ctype perf_event_attr struct."""
443
444        id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
445                               trace_point, 'id')
446
447        event_attr = perf_event_attr()
448        event_attr.config = int(open(id_path).read())
449        return event_attr
450
451    def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
452                     trace_filter, trace_set):
453        """Sets up the perf event in Linux.
454
455        Issues the syscall to register the event in the kernel and
456        then sets the optional filter.
457
458        """
459
460        event_attr = self._setup_event_attribute(trace_set, trace_point)
461
462        # First event will be group leader.
463        group_leader = -1
464
465        # All others have to pass the leader's descriptor instead.
466        if group.events:
467            group_leader = group.events[0].fd
468
469        fd = self._perf_event_open(event_attr, trace_pid,
470                                   trace_cpu, group_leader, 0)
471        if fd == -1:
472            err = ctypes.get_errno()
473            raise OSError(err, os.strerror(err),
474                          'while calling sys_perf_event_open().')
475
476        if trace_filter:
477            fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
478                        trace_filter)
479
480        self.fd = fd
481
482    def enable(self):
483        """Enables the trace event in the kernel.
484
485        Enabling the group leader makes reading counters from it and the
486        events under it possible.
487
488        """
489        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
490
491    def disable(self):
492        """Disables the trace event in the kernel.
493
494        Disabling the group leader makes reading all counters under it
495        impossible.
496
497        """
498        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
499
500    def reset(self):
501        """Resets the count of the trace event in the kernel."""
502        fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
503
504
505class Provider(object):
506    """Encapsulates functionalities used by all providers."""
507    def __init__(self, pid):
508        self.child_events = False
509        self.pid = pid
510
511    @staticmethod
512    def is_field_wanted(fields_filter, field):
513        """Indicate whether field is valid according to fields_filter."""
514        if not fields_filter:
515            return True
516        return re.match(fields_filter, field) is not None
517
518    @staticmethod
519    def walkdir(path):
520        """Returns os.walk() data for specified directory.
521
522        As it is only a wrapper it returns the same 3-tuple of (dirpath,
523        dirnames, filenames).
524        """
525        return next(os.walk(path))
526
527
528class TracepointProvider(Provider):
529    """Data provider for the stats class.
530
531    Manages the events/groups from which it acquires its data.
532
533    """
534    def __init__(self, pid, fields_filter):
535        self.group_leaders = []
536        self.filters = self._get_filters()
537        self.update_fields(fields_filter)
538        super(TracepointProvider, self).__init__(pid)
539
540    @staticmethod
541    def _get_filters():
542        """Returns a dict of trace events, their filter ids and
543        the values that can be filtered.
544
545        Trace events can be filtered for special values by setting a
546        filter string via an ioctl. The string normally has the format
547        identifier==value. For each filter a new event will be created, to
548        be able to distinguish the events.
549
550        """
551        filters = {}
552        filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
553        if ARCH.exit_reason_field and ARCH.exit_reasons:
554            filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
555        return filters
556
557    def _get_available_fields(self):
558        """Returns a list of available events of format 'event name(filter
559        name)'.
560
561        All available events have directories under
562        /sys/kernel/debug/tracing/events/ which export information
563        about the specific event. Therefore, listing the dirs gives us
564        a list of all available events.
565
566        Some events like the vm exit reasons can be filtered for
567        specific values. To take account for that, the routine below
568        creates special fields with the following format:
569        event name(filter name)
570
571        """
572        path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
573        fields = self.walkdir(path)[1]
574        extra = []
575        for field in fields:
576            if field in self.filters:
577                filter_name_, filter_dicts = self.filters[field]
578                for name in filter_dicts:
579                    extra.append(field + '(' + name + ')')
580        fields += extra
581        return fields
582
583    def update_fields(self, fields_filter):
584        """Refresh fields, applying fields_filter"""
585        self.fields = [field for field in self._get_available_fields()
586                       if self.is_field_wanted(fields_filter, field)]
587        # add parents for child fields - otherwise we won't see any output!
588        for field in self._fields:
589            parent = ARCH.tracepoint_is_child(field)
590            if (parent and parent not in self._fields):
591                self.fields.append(parent)
592
593    @staticmethod
594    def _get_online_cpus():
595        """Returns a list of cpu id integers."""
596        def parse_int_list(list_string):
597            """Returns an int list from a string of comma separated integers and
598            integer ranges."""
599            integers = []
600            members = list_string.split(',')
601
602            for member in members:
603                if '-' not in member:
604                    integers.append(int(member))
605                else:
606                    int_range = member.split('-')
607                    integers.extend(range(int(int_range[0]),
608                                          int(int_range[1]) + 1))
609
610            return integers
611
612        with open('/sys/devices/system/cpu/online') as cpu_list:
613            cpu_string = cpu_list.readline()
614            return parse_int_list(cpu_string)
615
616    def _setup_traces(self):
617        """Creates all event and group objects needed to be able to retrieve
618        data."""
619        fields = self._get_available_fields()
620        if self._pid > 0:
621            # Fetch list of all threads of the monitored pid, as qemu
622            # starts a thread for each vcpu.
623            path = os.path.join('/proc', str(self._pid), 'task')
624            groupids = self.walkdir(path)[1]
625        else:
626            groupids = self._get_online_cpus()
627
628        # The constant is needed as a buffer for python libs, std
629        # streams and other files that the script opens.
630        newlim = len(groupids) * len(fields) + 50
631        try:
632            softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
633
634            if hardlim < newlim:
635                # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
636                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
637            else:
638                # Raising the soft limit is sufficient.
639                resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
640
641        except ValueError:
642            sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
643
644        for groupid in groupids:
645            group = Group()
646            for name in fields:
647                tracepoint = name
648                tracefilter = None
649                match = re.match(r'(.*)\((.*)\)', name)
650                if match:
651                    tracepoint, sub = match.groups()
652                    tracefilter = ('%s==%d\0' %
653                                   (self.filters[tracepoint][0],
654                                    self.filters[tracepoint][1][sub]))
655
656                # From perf_event_open(2):
657                # pid > 0 and cpu == -1
658                # This measures the specified process/thread on any CPU.
659                #
660                # pid == -1 and cpu >= 0
661                # This measures all processes/threads on the specified CPU.
662                trace_cpu = groupid if self._pid == 0 else -1
663                trace_pid = int(groupid) if self._pid != 0 else -1
664
665                group.add_event(Event(name=name,
666                                      group=group,
667                                      trace_cpu=trace_cpu,
668                                      trace_pid=trace_pid,
669                                      trace_point=tracepoint,
670                                      trace_filter=tracefilter))
671
672            self.group_leaders.append(group)
673
674    @property
675    def fields(self):
676        return self._fields
677
678    @fields.setter
679    def fields(self, fields):
680        """Enables/disables the (un)wanted events"""
681        self._fields = fields
682        for group in self.group_leaders:
683            for index, event in enumerate(group.events):
684                if event.name in fields:
685                    event.reset()
686                    event.enable()
687                else:
688                    # Do not disable the group leader.
689                    # It would disable all of its events.
690                    if index != 0:
691                        event.disable()
692
693    @property
694    def pid(self):
695        return self._pid
696
697    @pid.setter
698    def pid(self, pid):
699        """Changes the monitored pid by setting new traces."""
700        self._pid = pid
701        # The garbage collector will get rid of all Event/Group
702        # objects and open files after removing the references.
703        self.group_leaders = []
704        self._setup_traces()
705        self.fields = self._fields
706
707    def read(self, by_guest=0):
708        """Returns 'event name: current value' for all enabled events."""
709        ret = defaultdict(int)
710        for group in self.group_leaders:
711            for name, val in group.read().items():
712                if name not in self._fields:
713                    continue
714                parent = ARCH.tracepoint_is_child(name)
715                if parent:
716                    name += ' ' + parent
717                ret[name] += val
718        return ret
719
720    def reset(self):
721        """Reset all field counters"""
722        for group in self.group_leaders:
723            for event in group.events:
724                event.reset()
725
726
727class DebugfsProvider(Provider):
728    """Provides data from the files that KVM creates in the kvm debugfs
729    folder."""
730    def __init__(self, pid, fields_filter, include_past):
731        self.update_fields(fields_filter)
732        self._baseline = {}
733        self.do_read = True
734        self.paths = []
735        super(DebugfsProvider, self).__init__(pid)
736        if include_past:
737            self._restore()
738
739    def _get_available_fields(self):
740        """"Returns a list of available fields.
741
742        The fields are all available KVM debugfs files
743
744        """
745        exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns']
746        fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
747                  if field not in exempt_list]
748
749        return fields
750
751    def update_fields(self, fields_filter):
752        """Refresh fields, applying fields_filter"""
753        self._fields = [field for field in self._get_available_fields()
754                        if self.is_field_wanted(fields_filter, field)]
755        # add parents for child fields - otherwise we won't see any output!
756        for field in self._fields:
757            parent = ARCH.debugfs_is_child(field)
758            if (parent and parent not in self._fields):
759                self.fields.append(parent)
760
761    @property
762    def fields(self):
763        return self._fields
764
765    @fields.setter
766    def fields(self, fields):
767        self._fields = fields
768        self.reset()
769
770    @property
771    def pid(self):
772        return self._pid
773
774    @pid.setter
775    def pid(self, pid):
776        self._pid = pid
777        if pid != 0:
778            vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
779            if len(vms) == 0:
780                self.do_read = False
781
782            self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
783
784        else:
785            self.paths = []
786            self.do_read = True
787
788    def _verify_paths(self):
789        """Remove invalid paths"""
790        for path in self.paths:
791            if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
792                self.paths.remove(path)
793                continue
794
795    def read(self, reset=0, by_guest=0):
796        """Returns a dict with format:'file name / field -> current value'.
797
798        Parameter 'reset':
799          0   plain read
800          1   reset field counts to 0
801          2   restore the original field counts
802
803        """
804        results = {}
805
806        # If no debugfs filtering support is available, then don't read.
807        if not self.do_read:
808            return results
809        self._verify_paths()
810
811        paths = self.paths
812        if self._pid == 0:
813            paths = []
814            for entry in os.walk(PATH_DEBUGFS_KVM):
815                for dir in entry[1]:
816                    paths.append(dir)
817        for path in paths:
818            for field in self._fields:
819                value = self._read_field(field, path)
820                key = path + field
821                if reset == 1:
822                    self._baseline[key] = value
823                if reset == 2:
824                    self._baseline[key] = 0
825                if self._baseline.get(key, -1) == -1:
826                    self._baseline[key] = value
827                parent = ARCH.debugfs_is_child(field)
828                if parent:
829                    field = field + ' ' + parent
830                else:
831                    if by_guest:
832                        field = key.split('-')[0]    # set 'field' to 'pid'
833                increment = value - self._baseline.get(key, 0)
834                if field in results:
835                    results[field] += increment
836                else:
837                    results[field] = increment
838
839        return results
840
841    def _read_field(self, field, path):
842        """Returns the value of a single field from a specific VM."""
843        try:
844            return int(open(os.path.join(PATH_DEBUGFS_KVM,
845                                         path,
846                                         field))
847                       .read())
848        except IOError:
849            return 0
850
851    def reset(self):
852        """Reset field counters"""
853        self._baseline = {}
854        self.read(1)
855
856    def _restore(self):
857        """Reset field counters"""
858        self._baseline = {}
859        self.read(2)
860
861
862EventStat = namedtuple('EventStat', ['value', 'delta'])
863
864
865class Stats(object):
866    """Manages the data providers and the data they provide.
867
868    It is used to set filters on the provider's data and collect all
869    provider data.
870
871    """
872    def __init__(self, options):
873        self.providers = self._get_providers(options)
874        self._pid_filter = options.pid
875        self._fields_filter = options.fields
876        self.values = {}
877        self._child_events = False
878
879    def _get_providers(self, options):
880        """Returns a list of data providers depending on the passed options."""
881        providers = []
882
883        if options.debugfs:
884            providers.append(DebugfsProvider(options.pid, options.fields,
885                                             options.debugfs_include_past))
886        if options.tracepoints or not providers:
887            providers.append(TracepointProvider(options.pid, options.fields))
888
889        return providers
890
891    def _update_provider_filters(self):
892        """Propagates fields filters to providers."""
893        # As we reset the counters when updating the fields we can
894        # also clear the cache of old values.
895        self.values = {}
896        for provider in self.providers:
897            provider.update_fields(self._fields_filter)
898
899    def reset(self):
900        self.values = {}
901        for provider in self.providers:
902            provider.reset()
903
904    @property
905    def fields_filter(self):
906        return self._fields_filter
907
908    @fields_filter.setter
909    def fields_filter(self, fields_filter):
910        if fields_filter != self._fields_filter:
911            self._fields_filter = fields_filter
912            self._update_provider_filters()
913
914    @property
915    def pid_filter(self):
916        return self._pid_filter
917
918    @pid_filter.setter
919    def pid_filter(self, pid):
920        if pid != self._pid_filter:
921            self._pid_filter = pid
922            self.values = {}
923            for provider in self.providers:
924                provider.pid = self._pid_filter
925
926    @property
927    def child_events(self):
928        return self._child_events
929
930    @child_events.setter
931    def child_events(self, val):
932        self._child_events = val
933        for provider in self.providers:
934            provider.child_events = val
935
936    def get(self, by_guest=0):
937        """Returns a dict with field -> (value, delta to last value) of all
938        provider data.
939        Key formats:
940          * plain: 'key' is event name
941          * child-parent: 'key' is in format '<child> <parent>'
942          * pid: 'key' is the pid of the guest, and the record contains the
943               aggregated event data
944        These formats are generated by the providers, and handled in class TUI.
945        """
946        for provider in self.providers:
947            new = provider.read(by_guest=by_guest)
948            for key in new:
949                oldval = self.values.get(key, EventStat(0, 0)).value
950                newval = new.get(key, 0)
951                newdelta = newval - oldval
952                self.values[key] = EventStat(newval, newdelta)
953        return self.values
954
955    def toggle_display_guests(self, to_pid):
956        """Toggle between collection of stats by individual event and by
957        guest pid
958
959        Events reported by DebugfsProvider change when switching to/from
960        reading by guest values. Hence we have to remove the excess event
961        names from self.values.
962
963        """
964        if any(isinstance(ins, TracepointProvider) for ins in self.providers):
965            return 1
966        if to_pid:
967            for provider in self.providers:
968                if isinstance(provider, DebugfsProvider):
969                    for key in provider.fields:
970                        if key in self.values.keys():
971                            del self.values[key]
972        else:
973            oldvals = self.values.copy()
974            for key in oldvals:
975                if key.isdigit():
976                    del self.values[key]
977        # Update oldval (see get())
978        self.get(to_pid)
979        return 0
980
981
982DELAY_DEFAULT = 3.0
983MAX_GUEST_NAME_LEN = 48
984MAX_REGEX_LEN = 44
985SORT_DEFAULT = 0
986MIN_DELAY = 0.1
987MAX_DELAY = 25.5
988
989
990class Tui(object):
991    """Instruments curses to draw a nice text ui."""
992    def __init__(self, stats, opts):
993        self.stats = stats
994        self.screen = None
995        self._delay_initial = 0.25
996        self._delay_regular = opts.set_delay
997        self._sorting = SORT_DEFAULT
998        self._display_guests = 0
999
1000    def __enter__(self):
1001        """Initialises curses for later use.  Based on curses.wrapper
1002           implementation from the Python standard library."""
1003        self.screen = curses.initscr()
1004        curses.noecho()
1005        curses.cbreak()
1006
1007        # The try/catch works around a minor bit of
1008        # over-conscientiousness in the curses module, the error
1009        # return from C start_color() is ignorable.
1010        try:
1011            curses.start_color()
1012        except curses.error:
1013            pass
1014
1015        # Hide cursor in extra statement as some monochrome terminals
1016        # might support hiding but not colors.
1017        try:
1018            curses.curs_set(0)
1019        except curses.error:
1020            pass
1021
1022        curses.use_default_colors()
1023        return self
1024
1025    def __exit__(self, *exception):
1026        """Resets the terminal to its normal state.  Based on curses.wrapper
1027           implementation from the Python standard library."""
1028        if self.screen:
1029            self.screen.keypad(0)
1030            curses.echo()
1031            curses.nocbreak()
1032            curses.endwin()
1033
1034    @staticmethod
1035    def get_all_gnames():
1036        """Returns a list of (pid, gname) tuples of all running guests"""
1037        res = []
1038        try:
1039            child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
1040                                     stdout=subprocess.PIPE)
1041        except:
1042            raise Exception
1043        for line in child.stdout:
1044            line = line.decode(ENCODING).lstrip().split(' ', 1)
1045            # perform a sanity check before calling the more expensive
1046            # function to possibly extract the guest name
1047            if ' -name ' in line[1]:
1048                res.append((line[0], Tui.get_gname_from_pid(line[0])))
1049        child.stdout.close()
1050
1051        return res
1052
1053    def _print_all_gnames(self, row):
1054        """Print a list of all running guests along with their pids."""
1055        self.screen.addstr(row, 2, '%8s  %-60s' %
1056                           ('Pid', 'Guest Name (fuzzy list, might be '
1057                            'inaccurate!)'),
1058                           curses.A_UNDERLINE)
1059        row += 1
1060        try:
1061            for line in self.get_all_gnames():
1062                self.screen.addstr(row, 2, '%8s  %-60s' % (line[0], line[1]))
1063                row += 1
1064                if row >= self.screen.getmaxyx()[0]:
1065                    break
1066        except Exception:
1067            self.screen.addstr(row + 1, 2, 'Not available')
1068
1069    @staticmethod
1070    def get_pid_from_gname(gname):
1071        """Fuzzy function to convert guest name to QEMU process pid.
1072
1073        Returns a list of potential pids, can be empty if no match found.
1074        Throws an exception on processing errors.
1075
1076        """
1077        pids = []
1078        for line in Tui.get_all_gnames():
1079            if gname == line[1]:
1080                pids.append(int(line[0]))
1081
1082        return pids
1083
1084    @staticmethod
1085    def get_gname_from_pid(pid):
1086        """Returns the guest name for a QEMU process pid.
1087
1088        Extracts the guest name from the QEMU comma line by processing the
1089        '-name' option. Will also handle names specified out of sequence.
1090
1091        """
1092        name = ''
1093        try:
1094            line = open('/proc/{}/cmdline'
1095                        .format(pid), 'r').read().split('\0')
1096            parms = line[line.index('-name') + 1].split(',')
1097            while '' in parms:
1098                # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
1099                # in # ['foo', '', 'bar'], which we revert here
1100                idx = parms.index('')
1101                parms[idx - 1] += ',' + parms[idx + 1]
1102                del parms[idx:idx+2]
1103            # the '-name' switch allows for two ways to specify the guest name,
1104            # where the plain name overrides the name specified via 'guest='
1105            for arg in parms:
1106                if '=' not in arg:
1107                    name = arg
1108                    break
1109                if arg[:6] == 'guest=':
1110                    name = arg[6:]
1111        except (ValueError, IOError, IndexError):
1112            pass
1113
1114        return name
1115
1116    def _update_pid(self, pid):
1117        """Propagates pid selection to stats object."""
1118        self.screen.addstr(4, 1, 'Updating pid filter...')
1119        self.screen.refresh()
1120        self.stats.pid_filter = pid
1121
1122    def _refresh_header(self, pid=None):
1123        """Refreshes the header."""
1124        if pid is None:
1125            pid = self.stats.pid_filter
1126        self.screen.erase()
1127        gname = self.get_gname_from_pid(pid)
1128        self._gname = gname
1129        if gname:
1130            gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
1131                                   if len(gname) > MAX_GUEST_NAME_LEN
1132                                   else gname))
1133        if pid > 0:
1134            self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
1135        else:
1136            self._headline = 'kvm statistics - summary'
1137        self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
1138        if self.stats.fields_filter:
1139            regex = self.stats.fields_filter
1140            if len(regex) > MAX_REGEX_LEN:
1141                regex = regex[:MAX_REGEX_LEN] + '...'
1142            self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
1143        if self._display_guests:
1144            col_name = 'Guest Name'
1145        else:
1146            col_name = 'Event'
1147        self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
1148                           (col_name, 'Total', '%Total', 'CurAvg/s'),
1149                           curses.A_STANDOUT)
1150        self.screen.addstr(4, 1, 'Collecting data...')
1151        self.screen.refresh()
1152
1153    def _refresh_body(self, sleeptime):
1154        def insert_child(sorted_items, child, values, parent):
1155            num = len(sorted_items)
1156            for i in range(0, num):
1157                # only add child if parent is present
1158                if parent.startswith(sorted_items[i][0]):
1159                    sorted_items.insert(i + 1, ('  ' + child, values))
1160
1161        def get_sorted_events(self, stats):
1162            """ separate parent and child events """
1163            if self._sorting == SORT_DEFAULT:
1164                def sortkey(pair):
1165                    # sort by (delta value, overall value)
1166                    v = pair[1]
1167                    return (v.delta, v.value)
1168            else:
1169                def sortkey(pair):
1170                    # sort by overall value
1171                    v = pair[1]
1172                    return v.value
1173
1174            childs = []
1175            sorted_items = []
1176            # we can't rule out child events to appear prior to parents even
1177            # when sorted - separate out all children first, and add in later
1178            for key, values in sorted(stats.items(), key=sortkey,
1179                                      reverse=True):
1180                if values == (0, 0):
1181                    continue
1182                if key.find(' ') != -1:
1183                    if not self.stats.child_events:
1184                        continue
1185                    childs.insert(0, (key, values))
1186                else:
1187                    sorted_items.append((key, values))
1188            if self.stats.child_events:
1189                for key, values in childs:
1190                    (child, parent) = key.split(' ')
1191                    insert_child(sorted_items, child, values, parent)
1192
1193            return sorted_items
1194
1195        if not self._is_running_guest(self.stats.pid_filter):
1196            if self._gname:
1197                try:  # ...to identify the guest by name in case it's back
1198                    pids = self.get_pid_from_gname(self._gname)
1199                    if len(pids) == 1:
1200                        self._refresh_header(pids[0])
1201                        self._update_pid(pids[0])
1202                        return
1203                except:
1204                    pass
1205            self._display_guest_dead()
1206            # leave final data on screen
1207            return
1208        row = 3
1209        self.screen.move(row, 0)
1210        self.screen.clrtobot()
1211        stats = self.stats.get(self._display_guests)
1212        total = 0.
1213        ctotal = 0.
1214        for key, values in stats.items():
1215            if self._display_guests:
1216                if self.get_gname_from_pid(key):
1217                    total += values.value
1218                continue
1219            if not key.find(' ') != -1:
1220                total += values.value
1221            else:
1222                ctotal += values.value
1223        if total == 0.:
1224            # we don't have any fields, or all non-child events are filtered
1225            total = ctotal
1226
1227        # print events
1228        tavg = 0
1229        tcur = 0
1230        guest_removed = False
1231        for key, values in get_sorted_events(self, stats):
1232            if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
1233                break
1234            if self._display_guests:
1235                key = self.get_gname_from_pid(key)
1236                if not key:
1237                    continue
1238            cur = int(round(values.delta / sleeptime)) if values.delta else 0
1239            if cur < 0:
1240                guest_removed = True
1241                continue
1242            if key[0] != ' ':
1243                if values.delta:
1244                    tcur += values.delta
1245                ptotal = values.value
1246                ltotal = total
1247            else:
1248                ltotal = ptotal
1249            self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
1250                               values.value,
1251                               values.value * 100 / float(ltotal), cur))
1252            row += 1
1253        if row == 3:
1254            if guest_removed:
1255                self.screen.addstr(4, 1, 'Guest removed, updating...')
1256            else:
1257                self.screen.addstr(4, 1, 'No matching events reported yet')
1258        if row > 4:
1259            tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
1260            self.screen.addstr(row, 1, '%-40s %10d        %8s' %
1261                               ('Total', total, tavg), curses.A_BOLD)
1262        self.screen.refresh()
1263
1264    def _display_guest_dead(self):
1265        marker = '   Guest is DEAD   '
1266        y = min(len(self._headline), 80 - len(marker))
1267        self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
1268
1269    def _show_msg(self, text):
1270        """Display message centered text and exit on key press"""
1271        hint = 'Press any key to continue'
1272        curses.cbreak()
1273        self.screen.erase()
1274        (x, term_width) = self.screen.getmaxyx()
1275        row = 2
1276        for line in text:
1277            start = (term_width - len(line)) // 2
1278            self.screen.addstr(row, start, line)
1279            row += 1
1280        self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
1281                           curses.A_STANDOUT)
1282        self.screen.getkey()
1283
1284    def _show_help_interactive(self):
1285        """Display help with list of interactive commands"""
1286        msg = ('   b     toggle events by guests (debugfs only, honors'
1287               ' filters)',
1288               '   c     clear filter',
1289               '   f     filter by regular expression',
1290               '   g     filter by guest name/PID',
1291               '   h     display interactive commands reference',
1292               '   o     toggle sorting order (Total vs CurAvg/s)',
1293               '   p     filter by guest name/PID',
1294               '   q     quit',
1295               '   r     reset stats',
1296               '   s     set delay between refreshs (value range: '
1297               '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1298               '   x     toggle reporting of stats for individual child trace'
1299               ' events',
1300               'Any other key refreshes statistics immediately')
1301        curses.cbreak()
1302        self.screen.erase()
1303        self.screen.addstr(0, 0, "Interactive commands reference",
1304                           curses.A_BOLD)
1305        self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
1306        row = 4
1307        for line in msg:
1308            self.screen.addstr(row, 0, line)
1309            row += 1
1310        self.screen.getkey()
1311        self._refresh_header()
1312
1313    def _show_filter_selection(self):
1314        """Draws filter selection mask.
1315
1316        Asks for a valid regex and sets the fields filter accordingly.
1317
1318        """
1319        msg = ''
1320        while True:
1321            self.screen.erase()
1322            self.screen.addstr(0, 0,
1323                               "Show statistics for events matching a regex.",
1324                               curses.A_BOLD)
1325            self.screen.addstr(2, 0,
1326                               "Current regex: {0}"
1327                               .format(self.stats.fields_filter))
1328            self.screen.addstr(5, 0, msg)
1329            self.screen.addstr(3, 0, "New regex: ")
1330            curses.echo()
1331            regex = self.screen.getstr().decode(ENCODING)
1332            curses.noecho()
1333            if len(regex) == 0:
1334                self.stats.fields_filter = ''
1335                self._refresh_header()
1336                return
1337            try:
1338                re.compile(regex)
1339                self.stats.fields_filter = regex
1340                self._refresh_header()
1341                return
1342            except re.error:
1343                msg = '"' + regex + '": Not a valid regular expression'
1344                continue
1345
1346    def _show_set_update_interval(self):
1347        """Draws update interval selection mask."""
1348        msg = ''
1349        while True:
1350            self.screen.erase()
1351            self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).'
1352                               % DELAY_DEFAULT, curses.A_BOLD)
1353            self.screen.addstr(4, 0, msg)
1354            self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
1355                               self._delay_regular)
1356            curses.echo()
1357            val = self.screen.getstr().decode(ENCODING)
1358            curses.noecho()
1359
1360            try:
1361                if len(val) > 0:
1362                    delay = float(val)
1363                    err = is_delay_valid(delay)
1364                    if err is not None:
1365                        msg = err
1366                        continue
1367                else:
1368                    delay = DELAY_DEFAULT
1369                self._delay_regular = delay
1370                break
1371
1372            except ValueError:
1373                msg = '"' + str(val) + '": Invalid value'
1374        self._refresh_header()
1375
1376    def _is_running_guest(self, pid):
1377        """Check if pid is still a running process."""
1378        if not pid:
1379            return True
1380        return os.path.isdir(os.path.join('/proc/', str(pid)))
1381
1382    def _show_vm_selection_by_guest(self):
1383        """Draws guest selection mask.
1384
1385        Asks for a guest name or pid until a valid guest name or '' is entered.
1386
1387        """
1388        msg = ''
1389        while True:
1390            self.screen.erase()
1391            self.screen.addstr(0, 0,
1392                               'Show statistics for specific guest or pid.',
1393                               curses.A_BOLD)
1394            self.screen.addstr(1, 0,
1395                               'This might limit the shown data to the trace '
1396                               'statistics.')
1397            self.screen.addstr(5, 0, msg)
1398            self._print_all_gnames(7)
1399            curses.echo()
1400            curses.curs_set(1)
1401            self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
1402            guest = self.screen.getstr().decode(ENCODING)
1403            curses.noecho()
1404
1405            pid = 0
1406            if not guest or guest == '0':
1407                break
1408            if guest.isdigit():
1409                if not self._is_running_guest(guest):
1410                    msg = '"' + guest + '": Not a running process'
1411                    continue
1412                pid = int(guest)
1413                break
1414            pids = []
1415            try:
1416                pids = self.get_pid_from_gname(guest)
1417            except:
1418                msg = '"' + guest + '": Internal error while searching, ' \
1419                      'use pid filter instead'
1420                continue
1421            if len(pids) == 0:
1422                msg = '"' + guest + '": Not an active guest'
1423                continue
1424            if len(pids) > 1:
1425                msg = '"' + guest + '": Multiple matches found, use pid ' \
1426                      'filter instead'
1427                continue
1428            pid = pids[0]
1429            break
1430        curses.curs_set(0)
1431        self._refresh_header(pid)
1432        self._update_pid(pid)
1433
1434    def show_stats(self):
1435        """Refreshes the screen and processes user input."""
1436        sleeptime = self._delay_initial
1437        self._refresh_header()
1438        start = 0.0  # result based on init value never appears on screen
1439        while True:
1440            self._refresh_body(time.time() - start)
1441            curses.halfdelay(int(sleeptime * 10))
1442            start = time.time()
1443            sleeptime = self._delay_regular
1444            try:
1445                char = self.screen.getkey()
1446                if char == 'b':
1447                    self._display_guests = not self._display_guests
1448                    if self.stats.toggle_display_guests(self._display_guests):
1449                        self._show_msg(['Command not available with '
1450                                        'tracepoints enabled', 'Restart with '
1451                                        'debugfs only (see option \'-d\') and '
1452                                        'try again!'])
1453                        self._display_guests = not self._display_guests
1454                    self._refresh_header()
1455                if char == 'c':
1456                    self.stats.fields_filter = ''
1457                    self._refresh_header(0)
1458                    self._update_pid(0)
1459                if char == 'f':
1460                    curses.curs_set(1)
1461                    self._show_filter_selection()
1462                    curses.curs_set(0)
1463                    sleeptime = self._delay_initial
1464                if char == 'g' or char == 'p':
1465                    self._show_vm_selection_by_guest()
1466                    sleeptime = self._delay_initial
1467                if char == 'h':
1468                    self._show_help_interactive()
1469                if char == 'o':
1470                    self._sorting = not self._sorting
1471                if char == 'q':
1472                    break
1473                if char == 'r':
1474                    self.stats.reset()
1475                if char == 's':
1476                    curses.curs_set(1)
1477                    self._show_set_update_interval()
1478                    curses.curs_set(0)
1479                    sleeptime = self._delay_initial
1480                if char == 'x':
1481                    self.stats.child_events = not self.stats.child_events
1482            except KeyboardInterrupt:
1483                break
1484            except curses.error:
1485                continue
1486
1487
1488def batch(stats):
1489    """Prints statistics in a key, value format."""
1490    try:
1491        s = stats.get()
1492        time.sleep(1)
1493        s = stats.get()
1494        for key, values in sorted(s.items()):
1495            print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
1496                  values.delta))
1497    except KeyboardInterrupt:
1498        pass
1499
1500
1501class StdFormat(object):
1502    def __init__(self, keys):
1503        self._banner = ''
1504        for key in keys:
1505            self._banner += key.split(' ')[0] + ' '
1506
1507    def get_banner(self):
1508        return self._banner
1509
1510    def get_statline(self, keys, s):
1511        res = ''
1512        for key in keys:
1513            res += ' %9d' % s[key].delta
1514        return res
1515
1516
1517class CSVFormat(object):
1518    def __init__(self, keys):
1519        self._banner = 'timestamp'
1520        self._banner += reduce(lambda res, key: "{},{!s}".format(res,
1521                               key.split(' ')[0]), keys, '')
1522
1523    def get_banner(self):
1524        return self._banner
1525
1526    def get_statline(self, keys, s):
1527        return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
1528                      keys, '')
1529
1530
1531def log(stats, opts, frmt, keys):
1532    """Prints statistics as reiterating key block, multiple value blocks."""
1533    global signal_received
1534    line = 0
1535    banner_repeat = 20
1536    f = None
1537
1538    def do_banner(opts):
1539        nonlocal f
1540        if opts.log_to_file:
1541            if not f:
1542                try:
1543                     f = open(opts.log_to_file, 'a')
1544                except (IOError, OSError):
1545                    sys.exit("Error: Could not open file: %s" %
1546                             opts.log_to_file)
1547                if isinstance(frmt, CSVFormat) and f.tell() != 0:
1548                    return
1549        print(frmt.get_banner(), file=f or sys.stdout)
1550
1551    def do_statline(opts, values):
1552        statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
1553                   frmt.get_statline(keys, values)
1554        print(statline, file=f or sys.stdout)
1555
1556    do_banner(opts)
1557    banner_printed = True
1558    while True:
1559        try:
1560            time.sleep(opts.set_delay)
1561            if signal_received:
1562                banner_printed = True
1563                line = 0
1564                f.close()
1565                do_banner(opts)
1566                signal_received = False
1567            if (line % banner_repeat == 0 and not banner_printed and
1568                not (opts.log_to_file and isinstance(frmt, CSVFormat))):
1569                do_banner(opts)
1570                banner_printed = True
1571            values = stats.get()
1572            if (not opts.skip_zero_records or
1573                any(values[k].delta != 0 for k in keys)):
1574                do_statline(opts, values)
1575                line += 1
1576                banner_printed = False
1577        except KeyboardInterrupt:
1578            break
1579
1580    if opts.log_to_file:
1581        f.close()
1582
1583
1584def handle_signal(sig, frame):
1585    global signal_received
1586
1587    signal_received = True
1588
1589    return
1590
1591
1592def is_delay_valid(delay):
1593    """Verify delay is in valid value range."""
1594    msg = None
1595    if delay < MIN_DELAY:
1596        msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY
1597    if delay > MAX_DELAY:
1598        msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY
1599    return msg
1600
1601
1602def get_options():
1603    """Returns processed program arguments."""
1604    description_text = """
1605This script displays various statistics about VMs running under KVM.
1606The statistics are gathered from the KVM debugfs entries and / or the
1607currently available perf traces.
1608
1609The monitoring takes additional cpu cycles and might affect the VM's
1610performance.
1611
1612Requirements:
1613- Access to:
1614    %s
1615    %s/events/*
1616    /proc/pid/task
1617- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
1618  CAP_SYS_ADMIN and perf events are used.
1619- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
1620  the large number of files that are possibly opened.
1621
1622Interactive Commands:
1623   b     toggle events by guests (debugfs only, honors filters)
1624   c     clear filter
1625   f     filter by regular expression
1626   g     filter by guest name
1627   h     display interactive commands reference
1628   o     toggle sorting order (Total vs CurAvg/s)
1629   p     filter by PID
1630   q     quit
1631   r     reset stats
1632   s     set update interval (value range: 0.1-25.5 secs)
1633   x     toggle reporting of stats for individual child trace events
1634Press any other key to refresh statistics immediately.
1635""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
1636
1637    class Guest_to_pid(argparse.Action):
1638        def __call__(self, parser, namespace, values, option_string=None):
1639            try:
1640                pids = Tui.get_pid_from_gname(values)
1641            except:
1642                sys.exit('Error while searching for guest "{}". Use "-p" to '
1643                         'specify a pid instead?'.format(values))
1644            if len(pids) == 0:
1645                sys.exit('Error: No guest by the name "{}" found'
1646                         .format(values))
1647            if len(pids) > 1:
1648                sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
1649                         ' to specify the desired pid'.format(" ".join(pids)))
1650            namespace.pid = pids[0]
1651
1652    argparser = argparse.ArgumentParser(description=description_text,
1653                                        formatter_class=argparse
1654                                        .RawTextHelpFormatter)
1655    argparser.add_argument('-1', '--once', '--batch',
1656                           action='store_true',
1657                           default=False,
1658                           help='run in batch mode for one second',
1659                           )
1660    argparser.add_argument('-c', '--csv',
1661                           action='store_true',
1662                           default=False,
1663                           help='log in csv format - requires option -l/-L',
1664                           )
1665    argparser.add_argument('-d', '--debugfs',
1666                           action='store_true',
1667                           default=False,
1668                           help='retrieve statistics from debugfs',
1669                           )
1670    argparser.add_argument('-f', '--fields',
1671                           default='',
1672                           help='''fields to display (regex)
1673"-f help" for a list of available events''',
1674                           )
1675    argparser.add_argument('-g', '--guest',
1676                           type=str,
1677                           help='restrict statistics to guest by name',
1678                           action=Guest_to_pid,
1679                           )
1680    argparser.add_argument('-i', '--debugfs-include-past',
1681                           action='store_true',
1682                           default=False,
1683                           help='include all available data on past events for'
1684                                ' debugfs',
1685                           )
1686    argparser.add_argument('-l', '--log',
1687                           action='store_true',
1688                           default=False,
1689                           help='run in logging mode (like vmstat)',
1690                           )
1691    argparser.add_argument('-L', '--log-to-file',
1692                           type=str,
1693                           metavar='FILE',
1694                           help="like '--log', but logging to a file"
1695                           )
1696    argparser.add_argument('-p', '--pid',
1697                           type=int,
1698                           default=0,
1699                           help='restrict statistics to pid',
1700                           )
1701    argparser.add_argument('-s', '--set-delay',
1702                           type=float,
1703                           default=DELAY_DEFAULT,
1704                           metavar='DELAY',
1705                           help='set delay between refreshs (value range: '
1706                                '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
1707                           )
1708    argparser.add_argument('-t', '--tracepoints',
1709                           action='store_true',
1710                           default=False,
1711                           help='retrieve statistics from tracepoints',
1712                           )
1713    argparser.add_argument('-z', '--skip-zero-records',
1714                           action='store_true',
1715                           default=False,
1716                           help='omit records with all zeros in logging mode',
1717                           )
1718    options = argparser.parse_args()
1719    if options.csv and not (options.log or options.log_to_file):
1720        sys.exit('Error: Option -c/--csv requires -l/--log')
1721    if options.skip_zero_records and not (options.log or options.log_to_file):
1722        sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
1723    try:
1724        # verify that we were passed a valid regex up front
1725        re.compile(options.fields)
1726    except re.error:
1727        sys.exit('Error: "' + options.fields + '" is not a valid regular '
1728                 'expression')
1729
1730    return options
1731
1732
1733def check_access(options):
1734    """Exits if the current user can't access all needed directories."""
1735    if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
1736                                                     not options.debugfs):
1737        sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
1738                         "when using the option -t (default).\n"
1739                         "If it is enabled, make {0} readable by the "
1740                         "current user.\n"
1741                         .format(PATH_DEBUGFS_TRACING))
1742        if options.tracepoints:
1743            sys.exit(1)
1744
1745        sys.stderr.write("Falling back to debugfs statistics!\n")
1746        options.debugfs = True
1747        time.sleep(5)
1748
1749    return options
1750
1751
1752def assign_globals():
1753    global PATH_DEBUGFS_KVM
1754    global PATH_DEBUGFS_TRACING
1755
1756    debugfs = ''
1757    for line in open('/proc/mounts'):
1758        if line.split(' ')[0] == 'debugfs':
1759            debugfs = line.split(' ')[1]
1760            break
1761    if debugfs == '':
1762        sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
1763                         "your kernel, mounted and\nreadable by the current "
1764                         "user:\n"
1765                         "('mount -t debugfs debugfs /sys/kernel/debug')\n")
1766        sys.exit(1)
1767
1768    PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
1769    PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
1770
1771    if not os.path.exists(PATH_DEBUGFS_KVM):
1772        sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
1773                         "your kernel and that the modules are loaded.\n")
1774        sys.exit(1)
1775
1776
1777def main():
1778    assign_globals()
1779    options = get_options()
1780    options = check_access(options)
1781
1782    if (options.pid > 0 and
1783        not os.path.isdir(os.path.join('/proc/',
1784                                       str(options.pid)))):
1785        sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
1786        sys.exit('Specified pid does not exist.')
1787
1788    err = is_delay_valid(options.set_delay)
1789    if err is not None:
1790        sys.exit('Error: ' + err)
1791
1792    stats = Stats(options)
1793
1794    if options.fields == 'help':
1795        stats.fields_filter = None
1796        event_list = []
1797        for key in stats.get().keys():
1798            event_list.append(key.split('(', 1)[0])
1799        sys.stdout.write('  ' + '\n  '.join(sorted(set(event_list))) + '\n')
1800        sys.exit(0)
1801
1802    if options.log or options.log_to_file:
1803        if options.log_to_file:
1804            signal.signal(signal.SIGHUP, handle_signal)
1805        keys = sorted(stats.get().keys())
1806        if options.csv:
1807            frmt = CSVFormat(keys)
1808        else:
1809            frmt = StdFormat(keys)
1810        log(stats, options, frmt, keys)
1811    elif not options.once:
1812        with Tui(stats, options) as tui:
1813            tui.show_stats()
1814    else:
1815        batch(stats)
1816
1817
1818if __name__ == "__main__":
1819    main()
1820