1# -*- coding: utf-8 -*- 2 3# This Source Code Form is subject to the terms of the Mozilla Public 4# License, v. 2.0. If a copy of the MPL was not distributed with this 5# file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7from collections import OrderedDict 8import datetime 9import functools 10import json 11from pathlib import Path 12import sys 13import textwrap 14from typing import Any, Callable, Iterable, Sequence, Tuple, Union 15import urllib.request 16 17import appdirs # type: ignore 18import diskcache # type: ignore 19import jinja2 20import jsonschema # type: ignore 21from jsonschema import _utils # type: ignore 22import yaml 23 24if sys.version_info < (3, 7): 25 import iso8601 # type: ignore 26 27 28TESTING_MODE = "pytest" in sys.modules 29 30 31JSONType = Union[list, dict, str, int, float, None] 32""" 33The types supported by JSON. 34 35This is only an approximation -- this should really be a recursive type. 36""" 37 38# Adapted from 39# https://stackoverflow.com/questions/34667108/ignore-dates-and-times-while-parsing-yaml 40 41 42class _NoDatesSafeLoader(yaml.SafeLoader): 43 @classmethod 44 def remove_implicit_resolver(cls, tag_to_remove): 45 """ 46 Remove implicit resolvers for a particular tag 47 48 Takes care not to modify resolvers in super classes. 49 50 We want to load datetimes as strings, not dates, because we 51 go on to serialise as json which doesn't have the advanced types 52 of yaml, and leads to incompatibilities down the track. 53 """ 54 if "yaml_implicit_resolvers" not in cls.__dict__: 55 cls.yaml_implicit_resolvers = cls.yaml_implicit_resolvers.copy() 56 57 for first_letter, mappings in cls.yaml_implicit_resolvers.items(): 58 cls.yaml_implicit_resolvers[first_letter] = [ 59 (tag, regexp) for tag, regexp in mappings if tag != tag_to_remove 60 ] 61 62 63# Since we use JSON schema to validate, and JSON schema doesn't support 64# datetimes, we don't want the YAML loader to give us datetimes -- just 65# strings. 66_NoDatesSafeLoader.remove_implicit_resolver("tag:yaml.org,2002:timestamp") 67 68 69def yaml_load(stream): 70 """ 71 Map line number to yaml nodes, and preserve the order 72 of metrics as they appear in the metrics.yaml file. 73 """ 74 75 class SafeLineLoader(_NoDatesSafeLoader): 76 pass 77 78 def _construct_mapping_adding_line(loader, node): 79 loader.flatten_mapping(node) 80 mapping = OrderedDict(loader.construct_pairs(node)) 81 mapping.defined_in = {"line": node.start_mark.line} 82 return mapping 83 84 SafeLineLoader.add_constructor( 85 yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, _construct_mapping_adding_line 86 ) 87 return yaml.load(stream, SafeLineLoader) 88 89 90def ordered_yaml_dump(data, **kwargs): 91 class OrderedDumper(yaml.Dumper): 92 pass 93 94 def _dict_representer(dumper, data): 95 return dumper.represent_mapping( 96 yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items() 97 ) 98 99 OrderedDumper.add_representer(OrderedDict, _dict_representer) 100 return yaml.dump(data, Dumper=OrderedDumper, **kwargs) 101 102 103def load_yaml_or_json(path: Path): 104 """ 105 Load the content from either a .json or .yaml file, based on the filename 106 extension. 107 108 :param path: `pathlib.Path` object 109 :rtype object: The tree of objects as a result of parsing the file. 110 :raises ValueError: The file is neither a .json, .yml or .yaml file. 111 :raises FileNotFoundError: The file does not exist. 112 """ 113 # If in py.test, support bits of literal JSON/YAML content 114 if TESTING_MODE and isinstance(path, dict): 115 return path 116 117 if path.suffix == ".json": 118 with path.open("r", encoding="utf-8") as fd: 119 return json.load(fd) 120 elif path.suffix in (".yml", ".yaml", ".yamlx"): 121 with path.open("r", encoding="utf-8") as fd: 122 return yaml_load(fd) 123 else: 124 raise ValueError(f"Unknown file extension {path.suffix}") 125 126 127def ensure_list(value: Any) -> Sequence[Any]: 128 """ 129 Ensures that the value is a list. If it is anything but a list or tuple, a 130 list with a single element containing only value is returned. 131 """ 132 if not isinstance(value, (list, tuple)): 133 return [value] 134 return value 135 136 137def to_camel_case(input: str, capitalize_first_letter: bool) -> str: 138 """ 139 Convert the value to camelCase. 140 141 This additionally replaces any '.' with '_'. The first letter is capitalized 142 depending on `capitalize_first_letter`. 143 """ 144 sanitized_input = input.replace(".", "_").replace("-", "_") 145 # Filter out any empty token. This could happen due to leading '_' or 146 # consecutive '__'. 147 tokens = [s.capitalize() for s in sanitized_input.split("_") if len(s) != 0] 148 # If we're not meant to capitalize the first letter, then lowercase it. 149 if not capitalize_first_letter: 150 tokens[0] = tokens[0].lower() 151 # Finally join the tokens and capitalize. 152 return "".join(tokens) 153 154 155def camelize(value: str) -> str: 156 """ 157 Convert the value to camelCase (with a lower case first letter). 158 159 This is a thin wrapper around inflection.camelize that handles dots in 160 addition to underscores. 161 """ 162 return to_camel_case(value, False) 163 164 165def Camelize(value: str) -> str: 166 """ 167 Convert the value to CamelCase (with an upper case first letter). 168 169 This is a thin wrapper around inflection.camelize that handles dots in 170 addition to underscores. 171 """ 172 return to_camel_case(value, True) 173 174 175def snake_case(value: str) -> str: 176 """ 177 Convert the value to snake_case. 178 """ 179 return value.lower().replace(".", "_").replace("-", "_") 180 181 182@functools.lru_cache() 183def get_jinja2_template( 184 template_name: str, filters: Iterable[Tuple[str, Callable]] = () 185): 186 """ 187 Get a Jinja2 template that ships with glean_parser. 188 189 The template has extra filters for camel-casing identifiers. 190 191 :param template_name: Name of a file in ``glean_parser/templates`` 192 :param filters: tuple of 2-tuple. A tuple of (name, func) pairs defining 193 additional filters. 194 """ 195 env = jinja2.Environment( 196 loader=jinja2.PackageLoader("glean_parser", "templates"), 197 trim_blocks=True, 198 lstrip_blocks=True, 199 ) 200 201 env.filters["camelize"] = camelize 202 env.filters["Camelize"] = Camelize 203 for filter_name, filter_func in filters: 204 env.filters[filter_name] = filter_func 205 206 return env.get_template(template_name) 207 208 209def keep_value(f): 210 """ 211 Wrap a generator so the value it returns (rather than yields), will be 212 accessible on the .value attribute when the generator is exhausted. 213 """ 214 215 class ValueKeepingGenerator(object): 216 def __init__(self, g): 217 self.g = g 218 self.value = None 219 220 def __iter__(self): 221 self.value = yield from self.g 222 223 @functools.wraps(f) 224 def g(*args, **kwargs): 225 return ValueKeepingGenerator(f(*args, **kwargs)) 226 227 return g 228 229 230def get_null_resolver(schema): 231 """ 232 Returns a JSON Pointer resolver that does nothing. 233 234 This lets us handle the moz: URLs in our schemas. 235 """ 236 237 class NullResolver(jsonschema.RefResolver): 238 def resolve_remote(self, uri): 239 if uri in self.store: 240 return self.store[uri] 241 if uri == "": 242 return self.referrer 243 244 return NullResolver.from_schema(schema) 245 246 247def fetch_remote_url(url: str, cache: bool = True): 248 """ 249 Fetches the contents from an HTTP url or local file path, and optionally 250 caches it to disk. 251 """ 252 # Include the Python version in the cache key, since caches aren't 253 # sharable across Python versions. 254 key = (url, str(sys.version_info)) 255 256 is_http = url.startswith("http") 257 258 if not is_http: 259 with open(url, "r", encoding="utf-8") as fd: 260 return fd.read() 261 262 if cache: 263 cache_dir = appdirs.user_cache_dir("glean_parser", "mozilla") 264 with diskcache.Cache(cache_dir) as dc: 265 if key in dc: 266 return dc[key] 267 268 contents: str = urllib.request.urlopen(url).read() 269 270 if cache: 271 with diskcache.Cache(cache_dir) as dc: 272 dc[key] = contents 273 274 return contents 275 276 277_unset = _utils.Unset() 278 279 280def pprint_validation_error(error) -> str: 281 """ 282 A version of jsonschema's ValidationError __str__ method that doesn't 283 include the schema fragment that failed. This makes the error messages 284 much more succinct. 285 286 It also shows any subschemas of anyOf/allOf that failed, if any (what 287 jsonschema calls "context"). 288 """ 289 essential_for_verbose = ( 290 error.validator, 291 error.validator_value, 292 error.instance, 293 error.schema, 294 ) 295 if any(m is _unset for m in essential_for_verbose): 296 return textwrap.fill(error.message) 297 298 instance = error.instance 299 for path in list(error.relative_path)[::-1]: 300 if isinstance(path, str): 301 instance = {path: instance} 302 else: 303 instance = [instance] 304 305 yaml_instance = ordered_yaml_dump(instance, width=72, default_flow_style=False) 306 307 parts = ["```", yaml_instance.rstrip(), "```", "", textwrap.fill(error.message)] 308 if error.context: 309 parts.extend( 310 textwrap.fill(x.message, initial_indent=" ", subsequent_indent=" ") 311 for x in error.context 312 ) 313 314 description = error.schema.get("description") 315 if description: 316 parts.extend(["", "Documentation for this node:", _utils.indent(description)]) 317 318 return "\n".join(parts) 319 320 321def format_error(filepath: Union[str, Path], header: str, content: str) -> str: 322 """ 323 Format a jsonshema validation error. 324 """ 325 if isinstance(filepath, Path): 326 filepath = filepath.resolve() 327 else: 328 filepath = "<string>" 329 if header: 330 return f"{filepath}: {header}\n{_utils.indent(content)}" 331 else: 332 return f"{filepath}:\n{_utils.indent(content)}" 333 334 335def parse_expires(expires: str) -> datetime.date: 336 """ 337 Parses the expired field date (yyyy-mm-dd) as a date. 338 Raises a ValueError in case the string is not properly formatted. 339 """ 340 try: 341 if sys.version_info < (3, 7): 342 try: 343 return iso8601.parse_date(expires).date() 344 except iso8601.ParseError: 345 raise ValueError() 346 else: 347 return datetime.date.fromisoformat(expires) 348 except ValueError: 349 raise ValueError( 350 f"Invalid expiration date '{expires}'. " 351 "Must be of the form yyyy-mm-dd in UTC." 352 ) 353 354 355def is_expired(expires: str) -> bool: 356 """ 357 Parses the `expires` field in a metric or ping and returns whether 358 the object should be considered expired. 359 """ 360 if expires == "never": 361 return False 362 elif expires == "expired": 363 return True 364 else: 365 date = parse_expires(expires) 366 return date <= datetime.datetime.utcnow().date() 367 368 369def validate_expires(expires: str) -> None: 370 """ 371 Raises a ValueError in case the `expires` is not ISO8601 parseable, 372 or in case the date is more than 730 days (~2 years) in the future. 373 """ 374 if expires in ("never", "expired"): 375 return 376 377 date = parse_expires(expires) 378 max_date = datetime.datetime.now() + datetime.timedelta(days=730) 379 if date > max_date.date(): 380 raise ValueError( 381 f"'{expires}' is more than 730 days (~2 years) in the future.", 382 "Please make sure this is intentional.", 383 "You can supress this warning by adding EXPIRATION_DATE_TOO_FAR to no_lint", 384 "See: https://mozilla.github.io/glean_parser/metrics-yaml.html#no_lint", 385 ) 386 387 388def report_validation_errors(all_objects): 389 """ 390 Report any validation errors found to the console. 391 """ 392 found_error = False 393 for error in all_objects: 394 found_error = True 395 print("=" * 78, file=sys.stderr) 396 print(error, file=sys.stderr) 397 return found_error 398 399 400def remove_output_params(d, output_params): 401 """ 402 Remove output-only params, such as "defined_in", 403 in order to validate the output against the input schema. 404 """ 405 modified_dict = {} 406 for key, value in d.items(): 407 if key is not output_params: 408 modified_dict[key] = value 409 return modified_dict 410 411 412# Names of parameters to pass to all metrics constructors constructors. 413common_metric_args = [ 414 "category", 415 "name", 416 "send_in_pings", 417 "lifetime", 418 "disabled", 419] 420 421 422# Names of parameters that only apply to some of the metrics types. 423# **CAUTION**: This list needs to be in the order the Swift & Rust type constructors 424# expects them. (The other language bindings don't care about the order). 425extra_metric_args = [ 426 "time_unit", 427 "memory_unit", 428 "allowed_extra_keys", 429 "reason_codes", 430 "range_min", 431 "range_max", 432 "bucket_count", 433 "histogram_type", 434 "numerators", 435] 436 437 438# This includes only things that the language bindings care about, not things 439# that are metadata-only or are resolved into other parameters at parse time. 440# **CAUTION**: This list needs to be in the order the Swift & Rust type constructors 441# expects them. (The other language bindings don't care about the order). The 442# `test_order_of_fields` test checks that the generated code is valid. 443# **DO NOT CHANGE THE ORDER OR ADD NEW FIELDS IN THE MIDDLE** 444metric_args = common_metric_args + extra_metric_args 445 446 447# Names of ping parameters to pass to constructors. 448ping_args = [ 449 "include_client_id", 450 "send_if_empty", 451 "name", 452 "reason_codes", 453] 454 455 456# Names of parameters to pass to both metric and ping constructors (no duplicates). 457extra_args = metric_args + [v for v in ping_args if v not in metric_args] 458