1#!/usr/bin/env python
2
3# This Source Code Form is subject to the terms of the Mozilla Public
4# License, v. 2.0. If a copy of the MPL was not distributed with this
5# file, You can obtain one at http://mozilla.org/MPL/2.0/.
6
7"""
8objects and methods for parsing and serializing Talos results
9see https://wiki.mozilla.org/Buildbot/Talos/DataFormat
10"""
11from __future__ import absolute_import, print_function
12
13import csv
14import json
15import os
16import re
17import six
18
19from talos import output, utils, filter
20
21
22class TalosResults(object):
23    """Container class for Talos results"""
24
25    def __init__(self):
26        self.results = []
27        self.extra_options = []
28
29    def add(self, test_results):
30        self.results.append(test_results)
31
32    def add_extra_option(self, extra_option):
33        self.extra_options.append(extra_option)
34
35    def output(self, output_formats):
36        """
37        output all results to appropriate URLs
38        - output_formats: a dict mapping formats to a list of URLs
39        """
40        try:
41
42            for key, urls in output_formats.items():
43                _output = output.Output(self, Results)
44                results = _output()
45                for url in urls:
46                    _output.output(results, url)
47        except utils.TalosError as e:
48            # print to results.out
49            try:
50                _output = output.GraphserverOutput(self)
51                results = _output()
52                _output.output(
53                    "file://%s" % os.path.join(os.getcwd(), "results.out"), results
54                )
55            except Exception:
56                pass
57            print("\nFAIL: %s" % str(e).replace("\n", "\nRETURN:"))
58            raise e
59
60
61class TestResults(object):
62    """container object for all test results across cycles"""
63
64    def __init__(self, test_config, global_counters=None, framework=None):
65        self.results = []
66        self.test_config = test_config
67        self.format = None
68        self.global_counters = global_counters or {}
69        self.all_counter_results = []
70        self.framework = framework
71        self.using_xperf = False
72
73    def name(self):
74        return self.test_config["name"]
75
76    def mainthread(self):
77        return self.test_config["mainthread"]
78
79    def add(self, results, counter_results=None):
80        """
81        accumulate one cycle of results
82        - results : TalosResults instance or path to browser log
83        - counter_results : counters accumulated for this cycle
84        """
85
86        # convert to a results class via parsing the browser log
87        format_pagename = True
88        if not self.test_config["format_pagename"]:
89            format_pagename = False
90
91        browserLog = BrowserLogResults(
92            results,
93            format_pagename=format_pagename,
94            counter_results=counter_results,
95            global_counters=self.global_counters,
96        )
97        results = browserLog.results()
98
99        self.using_xperf = browserLog.using_xperf
100        # ensure the results format matches previous results
101        if self.results:
102            if not results.format == self.results[0].format:
103                raise utils.TalosError("Conflicting formats for results")
104        else:
105            self.format = results.format
106
107        self.results.append(results)
108
109        if counter_results:
110            self.all_counter_results.append(counter_results)
111
112
113class Results(object):
114    def filter(self, testname, filters):
115        """
116        filter the results set;
117        applies each of the filters in order to the results data
118        filters should be callables that take a list
119        the last filter should return a scalar (float or int)
120        returns a list of [[data, page], ...]
121        """
122        retval = []
123        for result in self.results:
124            page = result["page"]
125            data = result["runs"]
126            remaining_filters = []
127
128            # ignore* functions return a filtered set of data
129            for f in filters:
130                if f.func.__name__.startswith("ignore"):
131                    data = f.apply(data)
132                else:
133                    remaining_filters.append(f)
134
135            # apply the summarization filters
136            for f in remaining_filters:
137                if f.func.__name__ == "v8_subtest":
138                    # for v8_subtest we need to page for reference data
139                    data = filter.v8_subtest(data, page)
140                else:
141                    data = f.apply(data)
142
143            summary = {
144                "filtered": data,  # backward compatibility with perfherder
145                "value": data,
146            }
147
148            retval.append([summary, page])
149
150        return retval
151
152    def raw_values(self):
153        return [(result["page"], result["runs"]) for result in self.results]
154
155    def values(self, testname, filters):
156        """return filtered (value, page) for each value"""
157        return [
158            [val, page]
159            for val, page in self.filter(testname, filters)
160            if val["filtered"] > -1
161        ]
162
163
164class TsResults(Results):
165    """
166    results for Ts tests
167    """
168
169    format = "tsformat"
170
171    def __init__(self, string, counter_results=None, format_pagename=True):
172        self.counter_results = counter_results
173
174        string = string.strip()
175        lines = string.splitlines()
176
177        # gather the data
178        self.results = []
179        index = 0
180
181        # Case where one test iteration may report multiple event values i.e. ts_paint
182        if string.startswith("{"):
183            jsonResult = json.loads(string)
184            result = {"runs": {}}
185            result["index"] = index
186            result["page"] = "NULL"
187
188            for event_label in jsonResult:
189                result["runs"][str(event_label)] = [jsonResult[event_label]]
190            self.results.append(result)
191
192        # Case where we support a pagename in the results
193        if not self.results:
194            for line in lines:
195                result = {}
196                r = line.strip().split(",")
197                r = [i for i in r if i]
198                if len(r) <= 1:
199                    continue
200                result["index"] = index
201                result["page"] = r[0]
202                # note: if we have len(r) >1, then we have pagename,raw_results
203                result["runs"] = [float(i) for i in r[1:]]
204                self.results.append(result)
205                index += 1
206
207        # Original case where we just have numbers and no pagename
208        if not self.results:
209            result = {}
210            result["index"] = index
211            result["page"] = "NULL"
212            result["runs"] = [float(val) for val in string.split("|")]
213            self.results.append(result)
214
215
216class PageloaderResults(Results):
217    """
218    results from a browser_dump snippet
219    https://wiki.mozilla.org/Buildbot/Talos/DataFormat#browser_output.txt
220    """
221
222    format = "tpformat"
223
224    def __init__(self, string, counter_results=None, format_pagename=True):
225        """
226        - string : string of relevent part of browser dump
227        - counter_results : counter results dictionary
228        """
229
230        self.counter_results = counter_results
231
232        string = string.strip()
233        lines = string.splitlines()
234
235        # currently we ignore the metadata on top of the output (e.g.):
236        # _x_x_mozilla_page_load
237        # _x_x_mozilla_page_load_details
238        # |i|pagename|runs|
239        lines = [line for line in lines if ";" in line]
240
241        # gather the data
242        self.results = []
243        prev_line = ""
244        for line in lines:
245            result = {}
246
247            # Bug 1562883 - Determine what is causing a single line to get
248            # written on multiple lines.
249            if prev_line:
250                line = prev_line + line
251                prev_line = ""
252
253            r = line.strip("|").split(";")
254            r = [i for i in r if i]
255
256            if len(r) <= 2:
257                prev_line = line
258                continue
259
260            result["index"] = int(r[0])
261            result["page"] = r[1]
262            result["runs"] = [float(i) for i in r[2:]]
263
264            # fix up page
265            if format_pagename:
266                result["page"] = self.format_pagename(result["page"])
267
268            self.results.append(result)
269
270    def format_pagename(self, page):
271        """
272        fix up the page for reporting
273        """
274        page = page.rstrip("/")
275        if "/" in page:
276            if "base_page" in page or "ref_page" in page:
277                # for base vs ref type test, the page name is different format, i.e.
278                # base_page_1_http://localhost:53309/tests/perf-reftest/bloom-basic.html
279                page = page.split("/")[-1]
280            else:
281                page = page.split("/")[0]
282        return page
283
284
285class BrowserLogResults(object):
286    """parse the results from the browser log output"""
287
288    # tokens for the report types
289    report_tokens = [
290        ("tsformat", ("__start_report", "__end_report")),
291        ("tpformat", ("__start_tp_report", "__end_tp_report")),
292    ]
293
294    # tokens for timestamps, in order (attribute, (start_delimeter,
295    # end_delimter))
296    time_tokens = [
297        ("startTime", ("__startTimestamp", "__endTimestamp")),
298        (
299            "beforeLaunchTime",
300            ("__startBeforeLaunchTimestamp", "__endBeforeLaunchTimestamp"),
301        ),
302        (
303            "endTime",
304            ("__startAfterTerminationTimestamp", "__endAfterTerminationTimestamp"),
305        ),
306    ]
307
308    # regular expression for failure case if we can't parse the tokens
309    RESULTS_REGEX_FAIL = re.compile("__FAIL(.*?)__FAIL", re.DOTALL | re.MULTILINE)
310
311    # regular expression for responsiveness results
312    RESULTS_RESPONSIVENESS_REGEX = re.compile(
313        "MOZ_EVENT_TRACE\ssample\s\d*?\s(\d*\.?\d*)$", re.DOTALL | re.MULTILINE
314    )
315
316    # classes for results types
317    classes = {"tsformat": TsResults, "tpformat": PageloaderResults}
318
319    # If we are using xperf, we do not upload the regular results, only
320    # xperf counters
321    using_xperf = False
322
323    def __init__(
324        self,
325        results_raw,
326        format_pagename=True,
327        counter_results=None,
328        global_counters=None,
329    ):
330        """
331        - shutdown : whether to record shutdown results or not
332        """
333
334        self.counter_results = counter_results
335        self.global_counters = global_counters
336        self.format_pagename = format_pagename
337        self.results_raw = results_raw
338
339        # parse the results
340        try:
341            match = self.RESULTS_REGEX_FAIL.search(self.results_raw)
342            if match:
343                self.error(match.group(1))
344                raise utils.TalosError(match.group(1))
345
346            self.parse()
347        except utils.TalosError:
348            # TODO: consider investigating this further or adding additional
349            # information
350            raise  # reraise failing exception
351
352        # accumulate counter results
353        self.counters(self.counter_results, self.global_counters)
354
355    def error(self, message):
356        """raise a TalosError for bad parsing of the browser log"""
357        raise utils.TalosError(message)
358
359    def parse(self):
360        position = -1
361
362        # parse the report
363        for format, tokens in self.report_tokens:
364            report, position = self.get_single_token(*tokens)
365            if report is None:
366                continue
367            self.browser_results = report
368            self.format = format
369            previous_tokens = tokens
370            break
371        else:
372            self.error(
373                "Could not find report in browser output: %s" % self.report_tokens
374            )
375
376        # parse the timestamps
377        for attr, tokens in self.time_tokens:
378
379            # parse the token contents
380            value, _last_token = self.get_single_token(*tokens)
381
382            # check for errors
383            if not value:
384                self.error(
385                    "Could not find %s in browser output: (tokens: %s)" % (attr, tokens)
386                )
387            try:
388                value = int(float(value))
389            except ValueError:
390                self.error("Could not cast %s to an integer: %s" % (attr, value))
391            if _last_token < position:
392                self.error(
393                    "%s [character position: %s] found before %s"
394                    " [character position: %s]"
395                    % (tokens, _last_token, previous_tokens, position)
396                )
397
398            # process
399            setattr(self, attr, value)
400            position = _last_token
401            previous_tokens = tokens
402
403    def get_single_token(self, start_token, end_token):
404        """browser logs should only have a single instance of token pairs"""
405        try:
406            parts, last_token = utils.tokenize(self.results_raw, start_token, end_token)
407        except AssertionError as e:
408            self.error(str(e))
409        if not parts:
410            return None, -1  # no match
411        if len(parts) != 1:
412            self.error("Multiple matches for %s,%s" % (start_token, end_token))
413        return parts[0], last_token
414
415    def results(self):
416        """return results instance appropriate to the format detected"""
417
418        if self.format not in self.classes:
419            raise utils.TalosError(
420                "Unable to find a results class for format: %s" % repr(self.format)
421            )
422
423        return self.classes[self.format](
424            self.browser_results, format_pagename=self.format_pagename
425        )
426
427    # methods for counters
428
429    def counters(self, counter_results=None, global_counters=None):
430        """accumulate all counters"""
431
432        if global_counters is not None:
433            if "responsiveness" in global_counters:
434                global_counters["responsiveness"].extend(self.responsiveness())
435            self.xperf(global_counters)
436
437    def xperf(self, counter_results):
438        """record xperf counters in counter_results dictionary"""
439
440        session_store_counter = "time_to_session_store_window_restored_ms"
441
442        counters = [
443            "main_startup_fileio",
444            "main_startup_netio",
445            "main_normal_fileio",
446            "main_normal_netio",
447            "nonmain_startup_fileio",
448            "nonmain_normal_fileio",
449            "nonmain_normal_netio",
450            session_store_counter,
451        ]
452
453        mainthread_counter_keys = ["readcount", "readbytes", "writecount", "writebytes"]
454        mainthread_counters = [
455            "_".join(["mainthread", counter_key])
456            for counter_key in mainthread_counter_keys
457        ]
458
459        self.mainthread_io(counter_results)
460
461        if (
462            not set(counters)
463            .union(set(mainthread_counters))
464            .intersection(counter_results.keys())
465        ):
466            # no xperf counters to accumulate
467            return
468
469        filename = "etl_output_thread_stats.csv"
470        if not os.path.exists(filename):
471            raise utils.TalosError(
472                "Error: we are looking for xperf results file %s,"
473                " and didn't find it" % filename
474            )
475
476        contents = open(filename).read()
477        lines = contents.splitlines()
478        reader = csv.reader(lines)
479        header = None
480        for row in reader:
481            # Read CSV
482            row = [i.strip() for i in row]
483            if not header:
484                # We are assuming the first row is the header and all other
485                # data is counters
486                header = row
487                continue
488            values = dict(six.moves.zip(header, row))
489
490            # Format for talos
491            thread = values["thread"]
492            counter = values["counter"].rsplit("_io_bytes", 1)[0]
493            counter_name = "%s_%s_%sio" % (thread, values["stage"], counter)
494            value = float(values["value"])
495
496            # Accrue counter
497            if counter_name in counter_results:
498                counter_results.setdefault(counter_name, []).append(value)
499                self.using_xperf = True
500
501        if set(mainthread_counters).intersection(counter_results.keys()):
502            filename = "etl_output.csv"
503            if not os.path.exists(filename):
504                raise utils.TalosError(
505                    "Error: we are looking for xperf results file"
506                    " %s, and didn't find it" % filename
507                )
508
509            contents = open(filename).read()
510            lines = contents.splitlines()
511            reader = csv.reader(lines)
512            header = None
513            for row in reader:
514                row = [i.strip() for i in row]
515                if not header:
516                    # We are assuming the first row is the header and all
517                    # other data is counters
518                    header = row
519                    continue
520                values = dict(six.moves.zip(header, row))
521                for i, mainthread_counter in enumerate(mainthread_counters):
522                    if int(values[mainthread_counter_keys[i]]) > 0:
523                        counter_results.setdefault(mainthread_counter, []).append(
524                            [
525                                int(values[mainthread_counter_keys[i]]),
526                                values["filename"],
527                            ]
528                        )
529
530        if session_store_counter in counter_results.keys():
531            filename = "etl_output_session_restore_stats.csv"
532            # This file is a csv but it only contains one field, so we'll just
533            # obtain the value by converting the second line in the file.
534            with open(filename, "r") as contents:
535                lines = contents.read().splitlines()
536                value = float(lines[1].strip())
537                counter_results.setdefault(session_store_counter, []).append(value)
538
539    def mainthread_io(self, counter_results):
540        """record mainthread IO counters in counter_results dictionary"""
541
542        # we want to measure mtio on xperf runs.
543        # this will be shoved into the xperf results as we ignore those
544        SCRIPT_DIR = os.path.abspath(os.path.realpath(os.path.dirname(__file__)))
545        filename = os.path.join(SCRIPT_DIR, "mainthread_io.json")
546        try:
547            contents = open(filename).read()
548            counter_results.setdefault("mainthreadio", []).append(contents)
549            self.using_xperf = True
550        except Exception:
551            # silent failure is fine here as we will only see this on tp5n runs
552            pass
553
554    def responsiveness(self):
555        return self.RESULTS_RESPONSIVENESS_REGEX.findall(self.results_raw)
556