1# -*- Mode: python; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 40 -*-
2# vim: set filetype=python:
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7from __future__ import (absolute_import, print_function)
8from abc import (ABCMeta, abstractmethod)
9from collections import deque
10import csv
11import os
12import re
13import subprocess
14from uuid import UUID
15
16# This constant must match the event declared in
17# toolkit/components/startup/mozprofilerprobe.mof
18EVENT_ID_FIREFOX_WINDOW_RESTORED = '{917B96B1-ECAD-4DAB-A760-8D49027748AE}'
19
20
21class XPerfSession(object):
22    """ This class encapsulates data that is retained for the term of the xperf
23    analysis. This includes the set of attributes, the set of events that are
24    owned by those attributes, and the mapping of field names to row indices.
25    """
26
27    def __init__(self):
28        self.attrs = set()
29        self.evtkey = dict()
30        self.evtset = set()
31
32    def is_empty(self):
33        return not self.attrs
34
35    def add_field_mapping(self, event_name, field_mapping):
36        self.evtkey[event_name] = field_mapping
37
38    def get_field_index(self, event_name, field_name):
39        return self.evtkey[event_name][field_name]
40
41    def add_attr(self, attr):
42        self.evtset.update(attr.get_events())
43        self.attrs.add(attr)
44
45    def remove_attr_events(self, attr):
46        self.evtset.difference_update(attr.get_events())
47
48    def remove_event(self, evt):
49        self.evtset.remove(evt)
50
51    def remove_attr(self, attr):
52        self.attrs.remove(attr)
53
54    def match_events(self, row):
55        # Make a shallow copy because events will mutate the event set
56        local_evtset = self.evtset.copy()
57        for e in local_evtset:
58            e.do_match(row)
59
60
61class XPerfAttribute(object):
62    """ Base class for all attributes. Each attribute has one or more events
63    that are associated with it. When those events fire, the attribute
64    accumulates statistics for those events.
65
66    Once all events for the attribute have fired, the attribute considers
67    itself to have completed, at which point its results may be retrieved. Note
68    that persistent attributes are an exception to this (see __init__).
69    """
70
71    __metaclass__ = ABCMeta
72
73    # Keys for the dict returned by get_results:
74
75    # Key whose value should be a dict containing any statistics that were
76    # accumulated by this attribute.
77    ACCUMULATIONS = 'XPerfAttribute.ACCUMULATIONS'
78
79    # The class name of this attribute.
80    NAME = 'XPerfAttribute.NAME'
81
82    # The primary result of the attribute.
83    RESULT = 'XPerfAttribute.RESULT'
84
85    # Some attributes may themselves act as containers for other attributes.
86    # The results of those contained attributes should be added to a dict that
87    # is indexed by this key.
88    SUB_ATTRIBUTES = 'XPerfAttribute.SUB_ATTRIBUTES'
89
90    # Other constants:
91    NON_PERSISTENT = False
92    PERSISTENT = True
93
94    def __init__(self, events, persistent=NON_PERSISTENT, **kwargs):
95        """ Positional arguments:
96
97        events -- a list containing one or more events that will be associated
98                  with the attribute.
99
100        Keyword arguments:
101
102        persistent -- either XPerfAttribute.PERSISTENT or
103                      XPerfAttribute.NON_PERSISTENT. Non-persistent attributes
104                      retire their events as the events occur. The attributes
105                      consider themselves to have completed once all of their
106                      events have been retired. Persistent attributes never
107                      retire their events. This is useful for writing
108                      attributes that must accumulate data from an indefinite
109                      number of events. Once example scenario would be
110                      implementing a counter of file I/O events; we don't want
111                      to retire after the first file I/O event is encountered;
112                      we want to continue counting the events until the end of
113                      the analysis.
114
115        output -- an optional function that accepts a single argument that will
116                  be a reference to the attribute itself. This function will be
117                  called as soon as the attribute's results are available.
118        """
119        for e in events:
120            e.set_attr(self)
121        self.evtlist = events
122        self.seen_evtlist = []
123        self.persistent = persistent
124        try:
125            self.output = kwargs['output']
126        except KeyError:
127            self.output = lambda a: None
128
129    def get_events(self):
130        return self.evtlist
131
132    def is_persistent(self):
133        return self.persistent
134
135    def set_session(self, sess):
136        if sess:
137            sess.add_attr(self)
138        else:
139            self.sess.remove_attr_events(self)
140        self.sess = sess
141
142    def get_field_index(self, key, field):
143        return self.sess.get_field_index(key, field)
144
145    def on_event_matched(self, evt):
146        """ Attributes that override this method should always call super().
147
148        This method is called any time one of the attribute's events matches
149        the current event, which is passed in as the evt parameter.
150        """
151        if evt not in self.evtlist:
152            raise Exception("Event mismatch: \"{!s}\" is not in this "
153                            .format((evt)) + "attribute's event list")
154
155        self.accumulate(evt)
156
157        # Persistent attributes never retire their events
158        if self.persistent:
159            return
160
161        self.remove_event(evt)
162
163        if self.evtlist:
164            # Propagate the whiteboard from the current event to the next
165            self.evtlist[0].set_whiteboard(evt.get_whiteboard())
166        else:
167            self.do_process()
168
169    def remove_event(self, evt):
170        self.evtlist.remove(evt)
171        self.seen_evtlist.append(evt)
172        self.sess.remove_event(evt)
173
174    def do_process(self):
175        self.sess.remove_attr(self)
176        self.process()
177        self.output(self)
178
179    def accumulate(self, evt):
180        """ Optional method that an attribute may implement for the purposes
181        of accumulating data about multiple events.
182        """
183        pass
184
185    @abstractmethod
186    def process(self):
187        """ This method is called once all of the attribute's events have been
188        retired.
189        """
190        pass
191
192    @abstractmethod
193    def get_results(self):
194        """ This method is used to retrieve the attibute's results. It returns
195        a dict whose keys are any of the constants declared at the top of this
196        class. At the very least, the XPerfAttribute.NAME and
197        XPerfAttribute.RESULT keys must be set.
198        """
199        pass
200
201
202class XPerfInterval(XPerfAttribute):
203    """ This attribute computes the duration of time between a start event and
204    and end event. It also accepts sub-attributes which are only active for the
205    duration of the interval.
206    """
207    def __init__(self, startevt, endevt, attrs=None, **kwargs):
208        super(XPerfInterval, self).__init__([startevt, endevt], **kwargs)
209        if not attrs:
210            self.attrs_during_interval = []
211        else:
212            if isinstance(attrs, list):
213                self.attrs_during_interval = attrs
214            else:
215                self.attrs_during_interval = [attrs]
216
217    def on_event_matched(self, evt):
218        if evt == self.evtlist[0]:
219            # When we see our start event, we need to activate our
220            # sub-attributes by setting their session to the same as ours.
221            for a in self.attrs_during_interval:
222                a.set_session(self.sess)
223        elif evt == self.evtlist[-1]:
224            # When we see our end event, we need to deactivate our
225            # sub-attributes by setting their session to None.
226            for a in self.attrs_during_interval:
227                a.set_session(None)
228        super(XPerfInterval, self).on_event_matched(evt)
229
230    def process(self):
231        # Propagate the process call to our sub-attributes
232        for a in self.attrs_during_interval:
233            a.process()
234
235    def __str__(self):
236        end = self.seen_evtlist[-1]
237        start = self.seen_evtlist[0]
238        duration = end.get_timestamp() - start.get_timestamp()
239        msg = "Interval from [{!s}] to [{!s}] took [{:.3f}]" \
240              " milliseconds.".format((start), (end), (duration))
241        if self.attrs_during_interval:
242            msg += " Within this interval:"
243            for attr in self.attrs_during_interval:
244                msg += " {!s}".format((attr))
245        msg += "\nStart: [{}]".format((start.get_timestamp()))
246        msg += " End: [{}]".format((end.get_timestamp()))
247        return msg
248
249    def get_results(self):
250        """ The result of an XPerf interval is the interval's duration, in
251        milliseconds. The results of the sub-attributes are also provided.
252        """
253        end = self.seen_evtlist[-1]
254        start = self.seen_evtlist[0]
255        duration = end.get_timestamp() - start.get_timestamp()
256
257        sub_attrs = []
258        for attr in self.attrs_during_interval:
259            sub_attrs.append(attr.get_results())
260
261        results = {XPerfAttribute.NAME: self.__class__.__name__,
262                   XPerfAttribute.RESULT: duration}
263        if sub_attrs:
264            results[XPerfAttribute.SUB_ATTRIBUTES] = sub_attrs
265
266        return results
267
268
269class XPerfCounter(XPerfAttribute):
270    """ This persistent attribute computes the number of occurrences of the
271    event specified to __init__. It can also accumulate additional data from
272    the events.
273    """
274
275    def __init__(self, evt, **kwargs):
276        """ Positional parameters:
277
278        evt -- The event to be counted.
279
280        Keyword arguments:
281
282        filters -- An optional argument that provides a dictionary that
283                   provides filters to be used to screen out unwanted events.
284                   Their key points to one of the XPerfEvent constants, and the
285                   value is a function that evaluates the corresponding value
286                   from the event's whiteboard.
287        """
288        super(XPerfCounter, self).__init__([evt], XPerfAttribute.PERSISTENT,
289                                           **kwargs)
290        self.values = dict()
291        self.count = 0
292        try:
293            self.filters = kwargs['filters']
294        except KeyError:
295            self.filters = dict()
296
297    def accumulate(self, evt):
298        data = evt.get_whiteboard()
299
300        for (key, comp) in self.filters.iteritems():
301            try:
302                testdata = data[key]
303            except KeyError:
304                pass
305            else:
306                if not comp(testdata):
307                    return
308
309        self.count += 1
310
311        fields = data[XPerfEvent.EVENT_ACCUMULATABLE_FIELDS]
312
313        for f in fields:
314            value = data[f]
315            try:
316                self.values[f] += value
317            except KeyError:
318                self.values[f] = value
319
320    def process(self):
321        self.remove_event(self.evtlist[0])
322
323    def __str__(self):
324        msg = "[{!s}] events of type [{!s}]".format((self.count),
325                                                    (self.seen_evtlist[0]))
326        if self.values:
327            msg += " with accumulated"
328            for (k, v) in self.values.iteritems():
329                msg += " [[{!s}] == {!s}]".format((k), (v))
330        return msg
331
332    def get_results(self):
333        results = {XPerfAttribute.NAME: self.__class__.__name__,
334                   XPerfAttribute.RESULT: self.count}
335
336        if self.values:
337            results[XPerfAttribute.ACCUMULATIONS] = self.values
338
339        return results
340
341
342class XPerfEvent(object):
343    """ Base class for all events. An important feature of this class is the
344    whiteboard variable. This variable allows for passing values between
345    successive events that are *owned by the same attribute*.
346
347    This allows, for example, a thread ID from a scheduler event to be consumed
348    by a subsequent event that only wants to fire for particular thread IDs.
349    """
350
351    # These keys are used to reference accumulated data that is passed across
352    # events by |self.whiteboard|:
353
354    # The pid recorded by a process or thread related event
355    EVENT_DATA_PID = 'pid'
356    # The command line recorded by a ProcessStart event
357    EVENT_DATA_CMD_LINE = 'cmd_line'
358    # The tid recorded by a thread related event
359    EVENT_DATA_TID = 'tid'
360    # Number of bytes recorded by an event that contains such quantities
361    EVENT_NUM_BYTES = 'num_bytes'
362    # File name recorded by an I/O event
363    EVENT_FILE_NAME = 'file_name'
364    # Set of field names that may be accumulated by an XPerfCounter. The
365    # counter uses this to query the whiteboard for other EVENT_* keys that
366    # contain values that should be accumulated.
367    EVENT_ACCUMULATABLE_FIELDS = 'accumulatable_fields'
368
369    timestamp_index = None
370
371    def __init__(self, key):
372        self.key = key
373        self.whiteboard = dict()
374
375    def set_attr(self, attr):
376        self.attr = attr
377
378    def get_attr(self):
379        return self.attr
380
381    def set_whiteboard(self, data):
382        self.whiteboard = data
383
384    def get_whiteboard(self):
385        return self.whiteboard
386
387    def get_field_index(self, field):
388        return self.attr.get_field_index(self.key, field)
389
390    def do_match(self, row):
391        if not self.match(row):
392            return False
393
394        # All events use the same index for timestamps, so timestamp_index can
395        # be a class variable.
396        if not XPerfEvent.timestamp_index:
397            XPerfEvent.timestamp_index = self.get_field_index('TimeStamp')
398
399        # Convert microseconds to milliseconds
400        self.timestamp = float(row[XPerfEvent.timestamp_index]) / 1000.0
401        self.attr.on_event_matched(self)
402        return True
403
404    def match(self, row):
405        return self.key == row[0]
406
407    def get_timestamp(self):
408        return self.timestamp
409
410
411class EventExpression(object):
412    """ EventExpression is an optional layer that sits between attributes and
413    events, and allow the user to compose multiple events into a more complex
414    event. To achieve this, EventExpression implementations must implement both
415    the XPerfEvent interface (so that their underlying attributes may
416    communicate with them), as well as the XPerfAttribute interface, so that
417    they present themselves as attributes to the events that run above them.
418    """
419
420    __metaclass__ = ABCMeta
421
422    def __init__(self, events):
423        # Event expressions implement the attribute interface, so for each
424        # event, we set ourselves as the underlying attribute
425        if isinstance(events, list):
426            for e in events:
427                e.set_attr(self)
428        else:
429            events.set_attr(self)
430
431    def set_attr(self, attr):
432        self.attr = attr
433
434    def get_attr(self):
435        return self.attr
436
437    def get_field_index(self, key, field):
438        return self.attr.get_field_index(key, field)
439
440    @abstractmethod
441    def set_whiteboard(self, data):
442        pass
443
444    @abstractmethod
445    def get_whiteboard(self):
446        pass
447
448    @abstractmethod
449    def on_event_matched(self, evt):
450        pass
451
452    @abstractmethod
453    def do_match(self, row):
454        pass
455
456    @abstractmethod
457    def get_timestamp(self):
458        pass
459
460
461class Nth(EventExpression):
462    """ This is a simple EventExpression that does not fire until the Nth
463    occurrence of the event that it encapsulates.
464    """
465    def __init__(self, N, event):
466        super(Nth, self).__init__(event)
467        self.event = event
468        self.N = N
469        self.match_count = 0
470
471    def on_event_matched(self, evt):
472        if evt != self.event:
473            raise Exception(
474                "Nth expression for event " +
475                "\"%s\" fired for event \"%s\" instead" % (self.event, evt))
476        self.match_count += 1
477        if self.match_count == self.N:
478            self.attr.on_event_matched(self)
479
480    def set_whiteboard(self, data):
481        self.event.set_whiteboard(data)
482
483    def get_whiteboard(self):
484        return self.event.get_whiteboard()
485
486    def do_match(self, row):
487        self.event.do_match(row)
488
489    def get_timestamp(self):
490        return self.event.get_timestamp()
491
492    def get_suffix(self):
493        lastDigit = str(self.N)[-1]
494        if lastDigit == '1':
495            return 'st'
496        elif lastDigit == '2':
497            return 'nd'
498        elif lastDigit == '3':
499            return 'rd'
500        else:
501            return 'th'
502
503    def __str__(self):
504        suffix = self.get_suffix()
505        return "{!s}{} [{!s}]".format((self.N), (suffix), (self.event))
506
507
508class EventSequence(EventExpression):
509    """ This EventExpression represents a sequence of events that must fire in
510    the correct order. Once the final event in the sequence is received, then
511    the EventSequence fires itself.
512
513    One interesting point of note is what happens when one of the events passed
514    into the EventSequence is persistent. If a peristent event is supplied as
515    the final entry in the sequence, and since the persistent event never
516    retires itself, the sequence will keep firing every time the persistent
517    event fires. This allows the user to provide an event sequence that is
518    essentially interpreted as, "once all of these other events have triggered,
519    fire this last one repeatedly for the remainder of the analysis."
520    """
521    def __init__(self, *events):
522        super(EventSequence, self).__init__(list(events))
523        if len(events) < 2:
524            raise Exception(
525                'EventSequence requires at least two events, %d provided' %
526                len(events))
527        self.events = deque(events)
528        self.seen_events = []
529
530    def on_event_matched(self, evt):
531        unseen_events = len(self.events) > 0
532        if unseen_events and evt != self.events[0] or not unseen_events and \
533           evt != self.seen_events[-1]:
534            raise Exception(
535                'Unexpected event "%s" is not a member of this event sequence'
536                % (evt)
537                )
538
539        # Move the event from events queue to seen_events
540        if unseen_events:
541            self.events.popleft()
542            self.seen_events.append(evt)
543
544        if self.events:
545            # Transfer attr data to the next event that will run
546            self.events[0].set_whiteboard(evt.get_whiteboard())
547        else:
548            # Or else we have run all of our events; notify the attribute
549            self.attr.on_event_matched(self)
550
551    def set_whiteboard(self, data):
552        self.events[0].set_whiteboard(data)
553
554    def get_whiteboard(self):
555        return self.seen_events[-1].get_whiteboard()
556
557    def do_match(self, row):
558        if self.attr.is_persistent() and len(self.events) == 0:
559            # Persistent attributes may repeatedly match the final event
560            self.seen_events[-1].do_match(row)
561        else:
562            self.events[0].do_match(row)
563
564    def get_timestamp(self):
565        return self.seen_events[-1].get_timestamp()
566
567    def __str__(self):
568        result = str()
569        for e in self.seen_events[:-1]:
570            result += "When [{!s}], ".format((e))
571        result += "then [{!s}]".format((self.seen_events[-1]))
572        return result
573
574
575class BindThread(EventExpression):
576    """ This event expression binds the event that it encapsulates to a
577    specific thread ID. This is used to force an event to only fire when it
578    matches the thread ID supplied by the whiteboard.
579    """
580    def __init__(self, event):
581        super(BindThread, self).__init__(event)
582        self.event = event
583        self.tid = None
584
585    def on_event_matched(self, evt):
586        if evt != self.event:
587            raise Exception(
588                "BindThread expression for event " +
589                "\"%s\" fired for event \"%s\" instead" % (self.event, evt))
590        self.attr.on_event_matched(self)
591
592    def set_whiteboard(self, data):
593        self.tid = data[XPerfEvent.EVENT_DATA_TID]
594        self.event.set_whiteboard(data)
595
596    def get_whiteboard(self):
597        return self.event.get_whiteboard()
598
599    def do_match(self, row):
600        try:
601            tid_index = self.get_field_index(row[0], 'ThreadID')
602        except KeyError:
603            # Not every event has a thread ID. We don't care about those.
604            return
605
606        if int(row[tid_index]) == self.tid:
607            self.event.do_match(row)
608
609    def get_timestamp(self):
610        return self.event.get_timestamp()
611
612    def __str__(self):
613        return "[{!s}] bound to thread [{!s}]".format((self.event), (self.tid))
614
615
616class ClassicEvent(XPerfEvent):
617    """ Classic ETW events are differentiated via a GUID. This class
618    implements the boilerplate for matching those events.
619    """
620    guid_index = None
621
622    def __init__(self, guidstr):
623        super(ClassicEvent, self).__init__('UnknownEvent/Classic')
624        self.guid = UUID(guidstr)
625
626    def match(self, row):
627        if not super(ClassicEvent, self).match(row):
628            return False
629
630        if not ClassicEvent.guid_index:
631            ClassicEvent.guid_index = self.get_field_index('EventGuid')
632
633        guid = UUID(row[ClassicEvent.guid_index])
634        return guid.int == self.guid.int
635
636    def __str__(self):
637        return "User event (classic): [{{{!s}}}]".format((self.guid))
638
639
640class SessionStoreWindowRestored(ClassicEvent):
641    """ The Firefox session store window restored event """
642    def __init__(self):
643        super(SessionStoreWindowRestored, self).__init__(
644            EVENT_ID_FIREFOX_WINDOW_RESTORED)
645
646    def __str__(self):
647        return "Firefox Session Store Window Restored"
648
649
650class ProcessStart(XPerfEvent):
651    cmd_line_index = None
652    process_index = None
653    extractor = re.compile('^(.+) \(\s*(\d+)\)$')
654
655    def __init__(self, leafname):
656        super(ProcessStart, self).__init__('P-Start')
657        self.leafname = leafname.lower()
658
659    @staticmethod
660    def tokenize_cmd_line(cmd_line_str):
661        result = []
662        quoted = False
663        current = str()
664
665        for c in cmd_line_str:
666            if quoted:
667                if c == '"':
668                    quoted = False
669            else:
670                if c == '"':
671                    quoted = True
672                elif c == ' ':
673                    result.append(current)
674                    current = str()
675                    continue
676
677            current += c
678
679        # Capture the final token
680        if current:
681            result.append(current)
682
683        return [t.strip('"') for t in result]
684
685    def match(self, row):
686        if not super(ProcessStart, self).match(row):
687            return False
688
689        if not ProcessStart.process_index:
690            ProcessStart.process_index = self.get_field_index(
691                'Process Name ( PID)')
692
693        m = ProcessStart.extractor.match(row[ProcessStart.process_index])
694        executable = m.group(1).lower()
695
696        if executable != self.leafname:
697            return False
698
699        pid = int(m.group(2))
700
701        if not ProcessStart.cmd_line_index:
702            ProcessStart.cmd_line_index = self.get_field_index('Command Line')
703
704        cmd_line = row[ProcessStart.cmd_line_index]
705        cmd_line_tokens = ProcessStart.tokenize_cmd_line(cmd_line)
706
707        self.whiteboard[XPerfEvent.EVENT_DATA_PID] = pid
708
709        try:
710            cmd_line_dict = self.whiteboard[XPerfEvent.EVENT_DATA_CMD_LINE]
711        except KeyError:
712            self.whiteboard[XPerfEvent.EVENT_DATA_CMD_LINE] = \
713                {pid: cmd_line_tokens}
714        else:
715            cmd_line_dict[pid] = cmd_line_tokens
716
717        return True
718
719    def __str__(self):
720        return "Start of a [{!s}] process".format((self.leafname))
721
722
723class ThreadStart(XPerfEvent):
724    """ ThreadStart only fires for threads whose process matches the
725    XPerfEvent.EVENT_DATA_PID entry in the whiteboard.
726    """
727    process_index = None
728    tid_index = None
729    pid_extractor = re.compile('^.+ \(\s*(\d+)\)$')
730
731    def __init__(self):
732        super(ThreadStart, self).__init__('T-Start')
733
734    def match(self, row):
735        if not super(ThreadStart, self).match(row):
736            return False
737
738        if not ThreadStart.process_index:
739            ThreadStart.process_index = self.get_field_index(
740                'Process Name ( PID)')
741
742        m = ThreadStart.pid_extractor.match(row[ThreadStart.process_index])
743        if self.whiteboard[XPerfEvent.EVENT_DATA_PID] != int(m.group(1)):
744            return False
745
746        if not ThreadStart.tid_index:
747            ThreadStart.tid_index = self.get_field_index('ThreadID')
748
749        self.whiteboard[XPerfEvent.EVENT_DATA_TID] = \
750            int(row[ThreadStart.tid_index])
751        return True
752
753    def __str__(self):
754        s = "Thread start in process [{}]".format(
755            (self.whiteboard[XPerfEvent.EVENT_DATA_PID]))
756        return s
757
758
759class ReadyThread(XPerfEvent):
760    """ ReadyThread only fires for the last thread whose ID was recorded in the
761    whiteboard via the XPerfEvent.EVENT_DATA_TID key.
762    """
763
764    tid_index = None
765
766    def __init__(self):
767        super(ReadyThread, self).__init__('ReadyThread')
768
769    def set_whiteboard(self, data):
770        super(ReadyThread, self).set_whiteboard(data)
771
772    def match(self, row):
773        if not super(ReadyThread, self).match(row):
774            return False
775
776        if not ReadyThread.tid_index:
777            ReadyThread.tid_index = self.get_field_index('Rdy TID')
778
779        try:
780            return self.whiteboard[XPerfEvent.EVENT_DATA_TID] == \
781                   int(row[ReadyThread.tid_index])
782        except KeyError:
783            return False
784
785    def __str__(self):
786        return "Thread [{!s}] is ready".format(
787            (self.whiteboard[XPerfEvent.EVENT_DATA_TID]))
788
789
790class ContextSwitchToThread(XPerfEvent):
791    """ ContextSwitchToThread only fires for the last thread whose ID was
792    recorded in the whiteboard via the XPerfEvent.EVENT_DATA_TID key.
793    """
794
795    tid_index = None
796
797    def __init__(self):
798        super(ContextSwitchToThread, self).__init__('CSwitch')
799
800    def match(self, row):
801        if not super(ContextSwitchToThread, self).match(row):
802            return False
803
804        if not ContextSwitchToThread.tid_index:
805            ContextSwitchToThread.tid_index = self.get_field_index('New TID')
806
807        try:
808            return self.whiteboard[XPerfEvent.EVENT_DATA_TID] == \
809                   int(row[ContextSwitchToThread.tid_index])
810        except KeyError:
811            return False
812
813    def __str__(self):
814        return "Context switch to thread " + \
815               "[{!s}]".format((self.whiteboard[XPerfEvent.EVENT_DATA_TID]))
816
817
818class FileIOReadOrWrite(XPerfEvent):
819    READ = 0
820    WRITE = 1
821
822    tid_index = None
823    num_bytes_index = None
824    file_name_index = None
825
826    def __init__(self, verb):
827        if verb == FileIOReadOrWrite.WRITE:
828            evt_name = 'FileIoWrite'
829            self.strverb = 'Write'
830        elif verb == FileIOReadOrWrite.READ:
831            evt_name = 'FileIoRead'
832            self.strverb = 'Read'
833        else:
834            raise Exception('Invalid verb argument to FileIOReadOrWrite')
835
836        super(FileIOReadOrWrite, self).__init__(evt_name)
837
838        self.verb = verb
839
840    def match(self, row):
841        if not super(FileIOReadOrWrite, self).match(row):
842            return False
843
844        if not FileIOReadOrWrite.tid_index:
845            FileIOReadOrWrite.tid_index = self.get_field_index('ThreadID')
846
847        if not FileIOReadOrWrite.num_bytes_index:
848            FileIOReadOrWrite.num_bytes_index = self.get_field_index('Size')
849
850        if not FileIOReadOrWrite.file_name_index:
851            FileIOReadOrWrite.file_name_index = \
852                self.get_field_index('FileName')
853
854        self.whiteboard[XPerfEvent.EVENT_DATA_TID] = \
855            int(row[FileIOReadOrWrite.tid_index])
856        self.whiteboard[XPerfEvent.EVENT_NUM_BYTES] = \
857            int(row[FileIOReadOrWrite.num_bytes_index], 0)
858        self.whiteboard[XPerfEvent.EVENT_FILE_NAME] = \
859            row[FileIOReadOrWrite.file_name_index].strip('"')
860        self.whiteboard[XPerfEvent.EVENT_ACCUMULATABLE_FIELDS] = \
861            {XPerfEvent.EVENT_NUM_BYTES}
862
863        return True
864
865    def __str__(self):
866        return "File I/O Bytes {}".format((self.strverb))
867
868
869class XPerfFile(object):
870    """ This class is the main entry point into xperf analysis. The user should
871    create one or more attributes, add them via add_attr(), and then call
872    analyze() to run.
873    """
874
875    def __init__(self, xperf_path=None, debug=False, **kwargs):
876        """ Keyword arguments:
877
878        debug -- When True, enables additional diagnostics
879        etlfile -- Path to a merged .etl file to use for the analysis.
880        etluser -- Path a a user-mode .etl file to use for the analysis. It
881                   will be merged with the supplied kernel-mode .etl file
882                   before running the analysis.
883        etlkernel -- Path to a kernel-mode .etl file to use for the analysis.
884                     It will be merged with the supplied user-mode .etl file
885                     before running the analysis.
886        csvfile -- Path to a CSV file that was previously exported using xperf.
887                   This file will be used for the analysis.
888        csvout -- When used with either the etlfile option or the (etluser and
889                  etlkernel) option, specifies the path to use for the exported
890                  CSV file.
891        keepcsv -- When true, any CSV file generated during the analysis will
892                   be left on the file system. Otherwise, the CSV file will be
893                   removed once the analysis is complete.
894        xperf_path -- Absolute path to xperf.exe. When absent, XPerfFile will
895                      attempt to resolve xperf via the system PATH.
896        """
897
898        self.csv_fd = None
899        self.csvfile = None
900        self.csvout = None
901        self.debug = debug
902        self.etlfile = None
903        self.keepcsv = False
904        self.xperf_path = xperf_path
905
906        if 'etlfile' in kwargs:
907            self.etlfile = os.path.abspath(kwargs['etlfile'])
908        elif 'etluser' in kwargs and 'etlkernel' in kwargs:
909            self.etlfile = self.etl_merge_user_kernel(**kwargs)
910        elif 'csvfile' not in kwargs:
911            raise Exception('Missing parameters: etl or csv files required')
912
913        if self.etlfile:
914            try:
915                self.csvout = os.path.abspath(kwargs['csvout'])
916            except KeyError:
917                pass
918            self.csvfile = self.etl2csv()
919        else:
920            self.csvfile = os.path.abspath(kwargs['csvfile'])
921
922        try:
923            self.keepcsv = kwargs['keepcsv']
924        except KeyError:
925            # If we've been supplied a csvfile, assume by default that we don't
926            # want that file deleted by us.
927            self.keepcsv = 'csvfile' in kwargs
928
929        self.sess = XPerfSession()
930
931    def add_attr(self, attr):
932        attr.set_session(self.sess)
933
934    def get_xperf_path(self):
935        if self.xperf_path:
936            return self.xperf_path
937
938        leaf_name = 'xperf.exe'
939        access_flags = os.R_OK | os.X_OK
940        path_entries = os.environ['PATH'].split(os.pathsep)
941        for entry in path_entries:
942            full = os.path.join(entry, leaf_name)
943            if os.access(full, access_flags):
944                self.xperf_path = os.path.abspath(full)
945                return self.xperf_path
946
947        raise Exception('Cannot find xperf')
948
949    def etl_merge_user_kernel(self, **kwargs):
950        user = os.path.abspath(kwargs['etluser'])
951        kernel = os.path.abspath(kwargs['etlkernel'])
952        (base, leaf) = os.path.split(user)
953        merged = os.path.join(base, 'merged.etl')
954
955        xperf_cmd = [self.get_xperf_path(), '-merge', user, kernel, merged]
956        if self.debug:
957            print("Executing '%s'" % subprocess.list2cmdline(xperf_cmd))
958        subprocess.call(xperf_cmd)
959        return merged
960
961    def etl2csv(self):
962        if self.csvout:
963            abs_csv_name = self.csvout
964        else:
965            (base, leaf) = os.path.split(self.etlfile)
966            (leaf, ext) = os.path.splitext(leaf)
967            abs_csv_name = os.path.join(base, "{}.csv".format((leaf)))
968
969        xperf_cmd = [self.get_xperf_path(), '-i', self.etlfile, '-o',
970                     abs_csv_name]
971        if self.debug:
972            print("Executing '%s'" % subprocess.list2cmdline(xperf_cmd))
973        subprocess.call(xperf_cmd)
974        return abs_csv_name
975
976    def __enter__(self):
977        if not self.load():
978            raise Exception('Load failed')
979        return self
980
981    def __exit__(self, exc_type, exc_value, traceback):
982        if self.csv_fd:
983            self.csv_fd.close()
984        if not self.csvout and not self.keepcsv:
985            os.remove(self.csvfile)
986
987    def load(self):
988        if not self.csvfile:
989            return False
990
991        self.csv_fd = open(self.csvfile, 'rb')
992        self.data = self.filter_xperf_header(csv.reader(self.csv_fd,
993                                                        delimiter=',',
994                                                        quotechar='"',
995                                                        quoting=csv.QUOTE_NONE,
996                                                        skipinitialspace=True))
997
998        return True
999
1000    def filter_xperf_header(self, csvdata):
1001        XPERF_CSV_NO_HEADER = -1
1002        XPERF_CSV_IN_HEADER = 0
1003        XPERF_CSV_END_HEADER_SEEN = 1
1004        XPERF_CSV_PAST_HEADER = 2
1005
1006        state = XPERF_CSV_NO_HEADER
1007
1008        while True:
1009            try:
1010                row = csvdata.next()
1011            except StopIteration:
1012                break
1013            except csv.Error:
1014                continue
1015
1016            if not row:
1017                continue
1018
1019            if state < XPERF_CSV_IN_HEADER:
1020                if row[0] == "BeginHeader":
1021                    state = XPERF_CSV_IN_HEADER
1022                continue
1023
1024            if state == XPERF_CSV_IN_HEADER:
1025                if row[0] == "EndHeader":
1026                    state = XPERF_CSV_END_HEADER_SEEN
1027                    continue
1028
1029                # Map field names to indices
1030                self.sess.add_field_mapping(row[0], {v: k + 1 for k, v in
1031                                                     enumerate(row[1:])})
1032                continue
1033
1034            if state >= XPERF_CSV_END_HEADER_SEEN:
1035                state += 1
1036
1037            if state > XPERF_CSV_PAST_HEADER:
1038                yield row
1039
1040    def analyze(self):
1041        for row in self.data:
1042            self.sess.match_events(row)
1043            if self.sess.is_empty():
1044                # No more attrs to look for, we might as well quit
1045                return
1046
1047
1048if __name__ == "__main__":
1049    def main():
1050        import argparse
1051
1052        parser = argparse.ArgumentParser()
1053        subparsers = parser.add_subparsers()
1054
1055        etl_parser = subparsers.add_parser(
1056            'etl', help='Input consists of one .etl file')
1057        etl_parser.add_argument(
1058            "etlfile", type=str,
1059            help="Path to a single .etl containing merged kernel " +
1060                 "and user mode data")
1061        etl_parser.add_argument(
1062            '--csvout', required=False,
1063            help='Specify a path to save the interim csv file to disk')
1064        etl_parser.add_argument(
1065            '--keepcsv', required=False,
1066            help='Do not delete the interim csv file that was written to disk',
1067            action='store_true')
1068
1069        etls_parser = subparsers.add_parser(
1070            'etls', help='Input consists of two .etl files')
1071        etls_parser.add_argument(
1072            "--user", type=str,
1073            help="Path to a user-mode .etl file", dest='etluser',
1074            required=True)
1075        etls_parser.add_argument(
1076            "--kernel", type=str,
1077            help="Path to a kernel-mode .etl file", dest='etlkernel',
1078            required=True)
1079        etls_parser.add_argument(
1080            '--csvout', required=False,
1081            help='Specify a path to save the interim csv file to disk')
1082        etls_parser.add_argument(
1083            '--keepcsv', required=False,
1084            help='Do not delete the interim csv file that was written to disk',
1085            action='store_true')
1086
1087        csv_parser = subparsers.add_parser(
1088            'csv', help='Input consists of one .csv file')
1089        csv_parser.add_argument(
1090            "csvfile", type=str,
1091            help="Path to a .csv file generated by xperf")
1092        # We always imply --keepcsv when running in csv mode
1093        csv_parser.add_argument(
1094            '--keepcsv', required=False, help=argparse.SUPPRESS,
1095            action='store_true', default=True)
1096
1097        args = parser.parse_args()
1098
1099        # This is merely sample code for running analyses.
1100
1101        with XPerfFile(**vars(args)) as etl:
1102            def null_output(attr):
1103                pass
1104
1105            def structured_output(attr):
1106                print("Results: [{!r}]".format((attr.get_results())))
1107
1108            def test_filter_exclude_dll(file):
1109                (base, ext) = os.path.splitext(file)
1110                return ext.lower() != '.dll'
1111
1112            myfilters = {XPerfEvent.EVENT_FILE_NAME: test_filter_exclude_dll}
1113
1114            fxstart1 = ProcessStart('firefox.exe')
1115            sess_restore = SessionStoreWindowRestored()
1116            interval1 = XPerfInterval(fxstart1, sess_restore,
1117                                      output=lambda a: print(str(a)))
1118            etl.add_attr(interval1)
1119
1120            fxstart2 = ProcessStart('firefox.exe')
1121            ready = EventSequence(Nth(2, ProcessStart('firefox.exe')),
1122                                  ThreadStart(), ReadyThread())
1123            interval2 = XPerfInterval(fxstart2, ready,
1124                                      output=structured_output)
1125            etl.add_attr(interval2)
1126
1127            browser_main_thread_file_io_read = EventSequence(
1128                Nth(2, ProcessStart('firefox.exe')), ThreadStart(),
1129                BindThread(FileIOReadOrWrite(FileIOReadOrWrite.READ)))
1130            read_counter = XPerfCounter(browser_main_thread_file_io_read,
1131                                        output=structured_output,
1132                                        filters=myfilters)
1133
1134            browser_main_thread_file_io_write = EventSequence(
1135                Nth(2, ProcessStart('firefox.exe')), ThreadStart(),
1136                BindThread(FileIOReadOrWrite(FileIOReadOrWrite.WRITE)))
1137            write_counter = XPerfCounter(browser_main_thread_file_io_write,
1138                                         output=structured_output)
1139
1140            # This is equivalent to the old-style xperf test (with launcher)
1141            parent_process_started = Nth(2, ProcessStart('firefox.exe'))
1142            interval3 = XPerfInterval(parent_process_started,
1143                                      SessionStoreWindowRestored(),
1144                                      read_counter, output=structured_output)
1145            etl.add_attr(interval3)
1146
1147            parent_process_started2 = Nth(2, ProcessStart('firefox.exe'))
1148            interval4 = XPerfInterval(parent_process_started2,
1149                                      SessionStoreWindowRestored(),
1150                                      write_counter, output=structured_output)
1151            etl.add_attr(interval4)
1152
1153            etl.analyze()
1154
1155    main()
1156