1# This Source Code Form is subject to the terms of the Mozilla Public 2# License, v. 2.0. If a copy of the MPL was not distributed with this 3# file, You can obtain one at http://mozilla.org/MPL/2.0/. 4 5 6import os 7import requests 8import json 9from datetime import datetime, timedelta 10import six 11 12 13TASK_DURATION_URL = ( 14 "https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json" 15) 16GRAPH_QUANTILES_URL = ( 17 "https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv" 18) 19from .estimates import TASK_DURATION_CACHE, GRAPH_QUANTILE_CACHE, TASK_DURATION_TAG_FILE 20 21 22def check_downloaded_history(tag_file, duration_cache, quantile_cache): 23 if not os.path.isfile(tag_file): 24 return False 25 26 try: 27 with open(tag_file) as f: 28 duration_tags = json.load(f) 29 download_date = datetime.strptime( 30 duration_tags.get("download_date"), "%Y-%M-%d" 31 ) 32 if download_date < datetime.now() - timedelta(days=7): 33 return False 34 except (OSError, ValueError): 35 return False 36 37 if not os.path.isfile(duration_cache): 38 return False 39 # Check for old format version of file. 40 with open(duration_cache) as f: 41 data = json.load(f) 42 if isinstance(data, list): 43 return False 44 if not os.path.isfile(quantile_cache): 45 return False 46 47 return True 48 49 50def download_task_history_data(cache_dir): 51 """Fetch task duration data exported from BigQuery.""" 52 task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE) 53 task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE) 54 graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE) 55 56 if check_downloaded_history( 57 task_duration_tag_file, task_duration_cache, graph_quantile_cache 58 ): 59 return 60 61 try: 62 os.unlink(task_duration_tag_file) 63 os.unlink(task_duration_cache) 64 os.unlink(graph_quantile_cache) 65 except OSError: 66 print("No existing task history to clean up.") 67 68 try: 69 r = requests.get(TASK_DURATION_URL, stream=True) 70 r.raise_for_status() 71 except requests.exceptions.RequestException as exc: 72 # This is fine, the durations just won't be in the preview window. 73 print( 74 "Error fetching task duration cache from {}: {}".format( 75 TASK_DURATION_URL, exc 76 ) 77 ) 78 return 79 80 # The data retrieved from google storage is a newline-separated 81 # list of json entries, which Python's json module can't parse. 82 duration_data = list() 83 for line in r.text.splitlines(): 84 duration_data.append(json.loads(line)) 85 86 # Reformat duration data to avoid list of dicts, as this is slow in the preview window 87 duration_data = {d["name"]: d["mean_duration_seconds"] for d in duration_data} 88 89 with open(task_duration_cache, "w") as f: 90 json.dump(duration_data, f, indent=4) 91 92 try: 93 r = requests.get(GRAPH_QUANTILES_URL, stream=True) 94 r.raise_for_status() 95 except requests.exceptions.RequestException as exc: 96 # This is fine, the percentile just won't be in the preview window. 97 print( 98 "Error fetching task group percentiles from {}: {}".format( 99 GRAPH_QUANTILES_URL, exc 100 ) 101 ) 102 return 103 104 with open(graph_quantile_cache, "w") as f: 105 f.write(six.ensure_text(r.content)) 106 107 with open(task_duration_tag_file, "w") as f: 108 json.dump({"download_date": datetime.now().strftime("%Y-%m-%d")}, f, indent=4) 109 110 111def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None): 112 """Trim the taskgraph cache used for dependencies. 113 114 Speeds up the fzf preview window to less human-perceptible 115 ranges.""" 116 if not os.path.isfile(graph_cache): 117 return 118 119 target_task_set = set() 120 if target_file and os.path.isfile(target_file): 121 with open(target_file) as f: 122 target_task_set = set(json.load(f).keys()) 123 124 with open(graph_cache) as f: 125 graph = json.load(f) 126 graph = { 127 name: list(defn["dependencies"].values()) 128 for name, defn in graph.items() 129 if name in target_task_set 130 } 131 with open(dep_cache, "w") as f: 132 json.dump(graph, f, indent=4) 133