mozperftest/metrics/perfherder.py

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import json
import jsonschema
import os
import pathlib
import statistics
import sys

from mozperftest.utils import strtobool
from mozperftest.layers import Layer
from mozperftest.metrics.exceptions import PerfherderValidDataError
from mozperftest.metrics.common import filtered_metrics, COMMON_ARGS
from mozperftest.metrics.utils import write_json, is_number


PERFHERDER_SCHEMA = pathlib.Path(
    "testing", "mozharness", "external_tools", "performance-artifact-schema.json"
)


class Perfherder(Layer):
    """Output data in the perfherder format."""

    name = "perfherder"
    activated = False

    arguments = COMMON_ARGS
    arguments.update(
        {
            "app": {
                "type": str,
                "default": "firefox",
                "choices": [
                    "firefox",
                    "chrome-m",
                    "chrome",
                    "chromium",
                    "fennec",
                    "geckoview",
                    "fenix",
                    "refbrow",
                ],
                "help": (
                    "Shorthand name of application that is "
                    "being tested (used in perfherder data)."
                ),
            },
            "stats": {
                "action": "store_true",
                "default": False,
                "help": "If set, browsertime statistics will be reported.",
            },
            "timestamp": {
                "type": float,
                "default": None,
                "help": (
                    "Timestamp to use for the perfherder data. Can be the "
                    "current date or a past date if needed."
                ),
            },
        }
    )

    def run(self, metadata):
        """Processes the given results into a perfherder-formatted data blob.

        If the `--perfherder` flag isn't provided, then the
        results won't be processed into a perfherder-data blob. If the
        flavor is unknown to us, then we assume that it comes from
        browsertime.

        XXX If needed, make a way to do flavor-specific processing

        :param results list/dict/str: Results to process.
        :param perfherder bool: True if results should be processed
            into a perfherder-data blob.
        :param flavor str: The flavor that is being processed.
        """
        prefix = self.get_arg("prefix")
        output = self.get_arg("output")

        # XXX Make an arugment for exclusions from metrics
        # (or go directly to regex's for metrics)
        exclusions = None
        if not self.get_arg("stats"):
            exclusions = ["statistics."]

        # Get filtered metrics
        metrics = self.get_arg("metrics")
        results, fullsettings = filtered_metrics(
            metadata,
            output,
            prefix,
            metrics=metrics,
            transformer=self.get_arg("transformer"),
            settings=True,
            exclude=exclusions,
            split_by=self.get_arg("split-by"),
            simplify_names=self.get_arg("simplify-names"),
            simplify_exclude=self.get_arg("simplify-exclude"),
        )

        if not any([results[name] for name in results]):
            self.warning("No results left after filtering")
            return metadata

        # XXX Add version info into this data
        app_info = {"name": self.get_arg("app", default="firefox")}

        # converting the metrics list into a mapping where
        # keys are the metrics nane
        if metrics is not None:
            metrics = dict([(m["name"], m) for m in metrics])
        else:
            metrics = {}

        all_perfherder_data = None
        for name, res in results.items():
            settings = dict(fullsettings[name])
            # updating the settings with values provided in metrics, if any
            if name in metrics:
                settings.update(metrics[name])

            # XXX Instead of just passing replicates here, we should build
            # up a partial perfherder data blob (with options) and subtest
            # overall values.
            subtests = {}
            for r in res:
                vals = [v["value"] for v in r["data"] if is_number(v["value"])]
                if vals:
                    subtests[r["subtest"]] = vals

            perfherder_data = self._build_blob(
                subtests,
                name=name,
                extra_options=settings.get("extraOptions"),
                should_alert=strtobool(settings.get("shouldAlert", False)),
                application=app_info,
                alert_threshold=float(settings.get("alertThreshold", 2.0)),
                lower_is_better=strtobool(settings.get("lowerIsBetter", True)),
                unit=settings.get("unit", "ms"),
                summary=settings.get("value"),
                framework=settings.get("framework"),
                metrics_info=metrics,
            )

            if all_perfherder_data is None:
                all_perfherder_data = perfherder_data
            else:
                all_perfherder_data["suites"].extend(perfherder_data["suites"])

        if prefix:
            # If a prefix was given, store it in the perfherder data as well
            all_perfherder_data["prefix"] = prefix

        timestamp = self.get_arg("timestamp")
        if timestamp is not None:
            all_perfherder_data["pushTimestamp"] = timestamp

        # Validate the final perfherder data blob
        with pathlib.Path(metadata._mach_cmd.topsrcdir, PERFHERDER_SCHEMA).open() as f:
            schema = json.load(f)
        jsonschema.validate(all_perfherder_data, schema)

        file = "perfherder-data.json"
        if prefix:
            file = "{}-{}".format(prefix, file)
        self.info("Writing perfherder results to {}".format(os.path.join(output, file)))

        # XXX "suites" key error occurs when using self.info so a print
        # is being done for now.

        # print() will produce a BlockingIOError on large outputs, so we use
        # sys.stdout
        sys.stdout.write("PERFHERDER_DATA: ")
        json.dump(all_perfherder_data, sys.stdout)
        sys.stdout.write("\n")
        sys.stdout.flush()

        metadata.set_output(write_json(all_perfherder_data, output, file))
        return metadata

    def _build_blob(
        self,
        subtests,
        name="browsertime",
        test_type="pageload",
        extra_options=None,
        should_alert=False,
        subtest_should_alert=None,
        suiteshould_alert=False,
        framework=None,
        application=None,
        alert_threshold=2.0,
        lower_is_better=True,
        unit="ms",
        summary=None,
        metrics_info=None,
    ):
        """Build a PerfHerder data blob from the given subtests.

        NOTE: This is a WIP, see the many TODOs across this file.

        Given a dictionary of subtests, and the values. Build up a
        perfherder data blob. Note that the naming convention for
        these arguments is different then the rest of the scripts
        to make it easier to see where they are going to in the perfherder
        data.

        For the `should_alert` field, if should_alert is True but `subtest_should_alert`
        is empty, then all subtests along with the suite will generate alerts.
        Otherwise, if the subtest_should_alert contains subtests to alert on, then
        only those will alert and nothing else (including the suite). If the
        suite value should alert, then set `suiteshould_alert` to True.

        :param subtests dict: A dictionary of subtests and the values.
            XXX TODO items for subtests:
                (1) Allow it to contain replicates and individual settings
                    for each of the subtests.
                (2) The geomean of the replicates will be taken for now,
                    but it should be made more flexible in some way.
                (3) We need some way to handle making multiple suites.
        :param name str: Name to give to the suite.
        :param test_type str: The type of test that was run.
        :param extra_options list: A list of extra options to store.
        :param should_alert bool: Whether all values in the suite should
            generate alerts or not.
        :param subtest_should_alert list: A list of subtests to alert on. If this
            is not empty, then it will disable the suite-level alerts.
        :param suiteshould_alert bool: Used if `subtest_should_alert` is not
            empty, and if True, then the suite-level value will generate
            alerts.
        :param framework dict: Information about the framework that
            is being tested.
        :param application dict: Information about the application that
            is being tested. Must include name, and optionally a version.
        :param alert_threshold float: The change in percentage this
            metric must undergo to to generate an alert.
        :param lower_is_better bool: If True, then lower values are better
            than higher ones.
        :param unit str: The unit of the data.
        :param summary float: The summary value to use in the perfherder
            data blob. By default, the mean of all the subtests will be
            used.

        :return dict: The PerfHerder data blob.
        """
        if extra_options is None:
            extra_options = []
        if subtest_should_alert is None:
            subtest_should_alert = []
        if framework is None:
            framework = {"name": "mozperftest"}
        if application is None:
            application = {"name": "firefox", "version": "9000"}
        if metrics_info is None:
            metrics_info = {}

        perf_subtests = []
        suite = {
            "name": name,
            "type": test_type,
            "value": None,
            "unit": unit,
            "extraOptions": extra_options,
            "lowerIsBetter": lower_is_better,
            "alertThreshold": alert_threshold,
            "shouldAlert": (should_alert and not subtest_should_alert)
            or suiteshould_alert,
            "subtests": perf_subtests,
        }

        perfherder = {
            "suites": [suite],
            "framework": framework,
            "application": application,
        }

        allvals = []
        alert_thresholds = []
        for measurement in subtests:
            reps = subtests[measurement]
            allvals.extend(reps)

            if len(reps) == 0:
                self.warning("No replicates found for {}, skipping".format(measurement))
                continue

            # Gather extra settings specified from within a metric specification
            subtest_lower_is_better = lower_is_better
            subtest_unit = unit
            for met in metrics_info:
                if met not in measurement:
                    continue

                extra_options.extend(metrics_info[met].get("extraOptions", []))
                alert_thresholds.append(
                    metrics_info[met].get("alertThreshold", alert_threshold)
                )

                subtest_unit = metrics_info[met].get("unit", unit)
                subtest_lower_is_better = metrics_info[met].get(
                    "lowerIsBetter", lower_is_better
                )

                if metrics_info[met].get("shouldAlert", should_alert):
                    subtest_should_alert.append(measurement)

                break

            perf_subtests.append(
                {
                    "name": measurement,
                    "replicates": reps,
                    "lowerIsBetter": subtest_lower_is_better,
                    "value": statistics.mean(reps),
                    "unit": subtest_unit,
                    "shouldAlert": should_alert or measurement in subtest_should_alert,
                }
            )

        if len(allvals) == 0:
            raise PerfherderValidDataError(
                "Could not build perfherder data blob because no valid data was provided, "
                + "only int/float data is accepted."
            )

        alert_thresholds = list(set(alert_thresholds))
        if len(alert_thresholds) > 1:
            raise PerfherderValidDataError(
                "Too many alertThreshold's were specified, expecting 1 but found "
                + f"{len(alert_thresholds)}"
            )
        elif len(alert_thresholds) == 1:
            suite["alertThreshold"] = alert_thresholds[0]

        suite["extraOptions"] = list(set(suite["extraOptions"]))
        suite["value"] = statistics.mean(allvals)
        return perfherder