1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4import json 5import jsonschema 6import os 7import pathlib 8import statistics 9import sys 10 11from mozperftest.utils import strtobool 12from mozperftest.layers import Layer 13from mozperftest.metrics.exceptions import PerfherderValidDataError 14from mozperftest.metrics.common import filtered_metrics, COMMON_ARGS 15from mozperftest.metrics.utils import write_json, is_number 16 17 18PERFHERDER_SCHEMA = pathlib.Path( 19 "testing", "mozharness", "external_tools", "performance-artifact-schema.json" 20) 21 22 23class Perfherder(Layer): 24 """Output data in the perfherder format.""" 25 26 name = "perfherder" 27 activated = False 28 29 arguments = COMMON_ARGS 30 arguments.update( 31 { 32 "app": { 33 "type": str, 34 "default": "firefox", 35 "choices": [ 36 "firefox", 37 "chrome-m", 38 "chrome", 39 "chromium", 40 "fennec", 41 "geckoview", 42 "fenix", 43 "refbrow", 44 ], 45 "help": ( 46 "Shorthand name of application that is " 47 "being tested (used in perfherder data)." 48 ), 49 }, 50 "stats": { 51 "action": "store_true", 52 "default": False, 53 "help": "If set, browsertime statistics will be reported.", 54 }, 55 "timestamp": { 56 "type": float, 57 "default": None, 58 "help": ( 59 "Timestamp to use for the perfherder data. Can be the " 60 "current date or a past date if needed." 61 ), 62 }, 63 } 64 ) 65 66 def run(self, metadata): 67 """Processes the given results into a perfherder-formatted data blob. 68 69 If the `--perfherder` flag isn't provided, then the 70 results won't be processed into a perfherder-data blob. If the 71 flavor is unknown to us, then we assume that it comes from 72 browsertime. 73 74 XXX If needed, make a way to do flavor-specific processing 75 76 :param results list/dict/str: Results to process. 77 :param perfherder bool: True if results should be processed 78 into a perfherder-data blob. 79 :param flavor str: The flavor that is being processed. 80 """ 81 prefix = self.get_arg("prefix") 82 output = self.get_arg("output") 83 84 # XXX Make an arugment for exclusions from metrics 85 # (or go directly to regex's for metrics) 86 exclusions = None 87 if not self.get_arg("stats"): 88 exclusions = ["statistics."] 89 90 # Get filtered metrics 91 metrics = self.get_arg("metrics") 92 results, fullsettings = filtered_metrics( 93 metadata, 94 output, 95 prefix, 96 metrics=metrics, 97 transformer=self.get_arg("transformer"), 98 settings=True, 99 exclude=exclusions, 100 split_by=self.get_arg("split-by"), 101 simplify_names=self.get_arg("simplify-names"), 102 simplify_exclude=self.get_arg("simplify-exclude"), 103 ) 104 105 if not any([results[name] for name in results]): 106 self.warning("No results left after filtering") 107 return metadata 108 109 # XXX Add version info into this data 110 app_info = {"name": self.get_arg("app", default="firefox")} 111 112 # converting the metrics list into a mapping where 113 # keys are the metrics nane 114 if metrics is not None: 115 metrics = dict([(m["name"], m) for m in metrics]) 116 else: 117 metrics = {} 118 119 all_perfherder_data = None 120 for name, res in results.items(): 121 settings = dict(fullsettings[name]) 122 # updating the settings with values provided in metrics, if any 123 if name in metrics: 124 settings.update(metrics[name]) 125 126 # XXX Instead of just passing replicates here, we should build 127 # up a partial perfherder data blob (with options) and subtest 128 # overall values. 129 subtests = {} 130 for r in res: 131 vals = [v["value"] for v in r["data"] if is_number(v["value"])] 132 if vals: 133 subtests[r["subtest"]] = vals 134 135 perfherder_data = self._build_blob( 136 subtests, 137 name=name, 138 extra_options=settings.get("extraOptions"), 139 should_alert=strtobool(settings.get("shouldAlert", False)), 140 application=app_info, 141 alert_threshold=float(settings.get("alertThreshold", 2.0)), 142 lower_is_better=strtobool(settings.get("lowerIsBetter", True)), 143 unit=settings.get("unit", "ms"), 144 summary=settings.get("value"), 145 framework=settings.get("framework"), 146 metrics_info=metrics, 147 ) 148 149 if all_perfherder_data is None: 150 all_perfherder_data = perfherder_data 151 else: 152 all_perfherder_data["suites"].extend(perfherder_data["suites"]) 153 154 if prefix: 155 # If a prefix was given, store it in the perfherder data as well 156 all_perfherder_data["prefix"] = prefix 157 158 timestamp = self.get_arg("timestamp") 159 if timestamp is not None: 160 all_perfherder_data["pushTimestamp"] = timestamp 161 162 # Validate the final perfherder data blob 163 with pathlib.Path(metadata._mach_cmd.topsrcdir, PERFHERDER_SCHEMA).open() as f: 164 schema = json.load(f) 165 jsonschema.validate(all_perfherder_data, schema) 166 167 file = "perfherder-data.json" 168 if prefix: 169 file = "{}-{}".format(prefix, file) 170 self.info("Writing perfherder results to {}".format(os.path.join(output, file))) 171 172 # XXX "suites" key error occurs when using self.info so a print 173 # is being done for now. 174 175 # print() will produce a BlockingIOError on large outputs, so we use 176 # sys.stdout 177 sys.stdout.write("PERFHERDER_DATA: ") 178 json.dump(all_perfherder_data, sys.stdout) 179 sys.stdout.write("\n") 180 sys.stdout.flush() 181 182 metadata.set_output(write_json(all_perfherder_data, output, file)) 183 return metadata 184 185 def _build_blob( 186 self, 187 subtests, 188 name="browsertime", 189 test_type="pageload", 190 extra_options=None, 191 should_alert=False, 192 subtest_should_alert=None, 193 suiteshould_alert=False, 194 framework=None, 195 application=None, 196 alert_threshold=2.0, 197 lower_is_better=True, 198 unit="ms", 199 summary=None, 200 metrics_info=None, 201 ): 202 """Build a PerfHerder data blob from the given subtests. 203 204 NOTE: This is a WIP, see the many TODOs across this file. 205 206 Given a dictionary of subtests, and the values. Build up a 207 perfherder data blob. Note that the naming convention for 208 these arguments is different then the rest of the scripts 209 to make it easier to see where they are going to in the perfherder 210 data. 211 212 For the `should_alert` field, if should_alert is True but `subtest_should_alert` 213 is empty, then all subtests along with the suite will generate alerts. 214 Otherwise, if the subtest_should_alert contains subtests to alert on, then 215 only those will alert and nothing else (including the suite). If the 216 suite value should alert, then set `suiteshould_alert` to True. 217 218 :param subtests dict: A dictionary of subtests and the values. 219 XXX TODO items for subtests: 220 (1) Allow it to contain replicates and individual settings 221 for each of the subtests. 222 (2) The geomean of the replicates will be taken for now, 223 but it should be made more flexible in some way. 224 (3) We need some way to handle making multiple suites. 225 :param name str: Name to give to the suite. 226 :param test_type str: The type of test that was run. 227 :param extra_options list: A list of extra options to store. 228 :param should_alert bool: Whether all values in the suite should 229 generate alerts or not. 230 :param subtest_should_alert list: A list of subtests to alert on. If this 231 is not empty, then it will disable the suite-level alerts. 232 :param suiteshould_alert bool: Used if `subtest_should_alert` is not 233 empty, and if True, then the suite-level value will generate 234 alerts. 235 :param framework dict: Information about the framework that 236 is being tested. 237 :param application dict: Information about the application that 238 is being tested. Must include name, and optionally a version. 239 :param alert_threshold float: The change in percentage this 240 metric must undergo to to generate an alert. 241 :param lower_is_better bool: If True, then lower values are better 242 than higher ones. 243 :param unit str: The unit of the data. 244 :param summary float: The summary value to use in the perfherder 245 data blob. By default, the mean of all the subtests will be 246 used. 247 248 :return dict: The PerfHerder data blob. 249 """ 250 if extra_options is None: 251 extra_options = [] 252 if subtest_should_alert is None: 253 subtest_should_alert = [] 254 if framework is None: 255 framework = {"name": "mozperftest"} 256 if application is None: 257 application = {"name": "firefox", "version": "9000"} 258 if metrics_info is None: 259 metrics_info = {} 260 261 perf_subtests = [] 262 suite = { 263 "name": name, 264 "type": test_type, 265 "value": None, 266 "unit": unit, 267 "extraOptions": extra_options, 268 "lowerIsBetter": lower_is_better, 269 "alertThreshold": alert_threshold, 270 "shouldAlert": (should_alert and not subtest_should_alert) 271 or suiteshould_alert, 272 "subtests": perf_subtests, 273 } 274 275 perfherder = { 276 "suites": [suite], 277 "framework": framework, 278 "application": application, 279 } 280 281 allvals = [] 282 alert_thresholds = [] 283 for measurement in subtests: 284 reps = subtests[measurement] 285 allvals.extend(reps) 286 287 if len(reps) == 0: 288 self.warning("No replicates found for {}, skipping".format(measurement)) 289 continue 290 291 # Gather extra settings specified from within a metric specification 292 subtest_lower_is_better = lower_is_better 293 subtest_unit = unit 294 for met in metrics_info: 295 if met not in measurement: 296 continue 297 298 extra_options.extend(metrics_info[met].get("extraOptions", [])) 299 alert_thresholds.append( 300 metrics_info[met].get("alertThreshold", alert_threshold) 301 ) 302 303 subtest_unit = metrics_info[met].get("unit", unit) 304 subtest_lower_is_better = metrics_info[met].get( 305 "lowerIsBetter", lower_is_better 306 ) 307 308 if metrics_info[met].get("shouldAlert", should_alert): 309 subtest_should_alert.append(measurement) 310 311 break 312 313 perf_subtests.append( 314 { 315 "name": measurement, 316 "replicates": reps, 317 "lowerIsBetter": subtest_lower_is_better, 318 "value": statistics.mean(reps), 319 "unit": subtest_unit, 320 "shouldAlert": should_alert or measurement in subtest_should_alert, 321 } 322 ) 323 324 if len(allvals) == 0: 325 raise PerfherderValidDataError( 326 "Could not build perfherder data blob because no valid data was provided, " 327 + "only int/float data is accepted." 328 ) 329 330 alert_thresholds = list(set(alert_thresholds)) 331 if len(alert_thresholds) > 1: 332 raise PerfherderValidDataError( 333 "Too many alertThreshold's were specified, expecting 1 but found " 334 + f"{len(alert_thresholds)}" 335 ) 336 elif len(alert_thresholds) == 1: 337 suite["alertThreshold"] = alert_thresholds[0] 338 339 suite["extraOptions"] = list(set(suite["extraOptions"])) 340 suite["value"] = statistics.mean(allvals) 341 return perfherder 342