1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5
6import os
7import requests
8import json
9from datetime import datetime, timedelta
10import six
11
12
13TASK_DURATION_URL = (
14    "https://storage.googleapis.com/mozilla-mach-data/task_duration_history.json"
15)
16GRAPH_QUANTILES_URL = (
17    "https://storage.googleapis.com/mozilla-mach-data/machtry_quantiles.csv"
18)
19from .estimates import TASK_DURATION_CACHE, GRAPH_QUANTILE_CACHE, TASK_DURATION_TAG_FILE
20
21
22def check_downloaded_history(tag_file, duration_cache, quantile_cache):
23    if not os.path.isfile(tag_file):
24        return False
25
26    try:
27        with open(tag_file) as f:
28            duration_tags = json.load(f)
29        download_date = datetime.strptime(
30            duration_tags.get("download_date"), "%Y-%M-%d"
31        )
32        if download_date < datetime.now() - timedelta(days=7):
33            return False
34    except (OSError, ValueError):
35        return False
36
37    if not os.path.isfile(duration_cache):
38        return False
39    # Check for old format version of file.
40    with open(duration_cache) as f:
41        data = json.load(f)
42        if isinstance(data, list):
43            return False
44    if not os.path.isfile(quantile_cache):
45        return False
46
47    return True
48
49
50def download_task_history_data(cache_dir):
51    """Fetch task duration data exported from BigQuery."""
52    task_duration_cache = os.path.join(cache_dir, TASK_DURATION_CACHE)
53    task_duration_tag_file = os.path.join(cache_dir, TASK_DURATION_TAG_FILE)
54    graph_quantile_cache = os.path.join(cache_dir, GRAPH_QUANTILE_CACHE)
55
56    if check_downloaded_history(
57        task_duration_tag_file, task_duration_cache, graph_quantile_cache
58    ):
59        return
60
61    try:
62        os.unlink(task_duration_tag_file)
63        os.unlink(task_duration_cache)
64        os.unlink(graph_quantile_cache)
65    except OSError:
66        print("No existing task history to clean up.")
67
68    try:
69        r = requests.get(TASK_DURATION_URL, stream=True)
70        r.raise_for_status()
71    except requests.exceptions.RequestException as exc:
72        # This is fine, the durations just won't be in the preview window.
73        print(
74            "Error fetching task duration cache from {}: {}".format(
75                TASK_DURATION_URL, exc
76            )
77        )
78        return
79
80    # The data retrieved from google storage is a newline-separated
81    # list of json entries, which Python's json module can't parse.
82    duration_data = list()
83    for line in r.text.splitlines():
84        duration_data.append(json.loads(line))
85
86    # Reformat duration data to avoid list of dicts, as this is slow in the preview window
87    duration_data = {d["name"]: d["mean_duration_seconds"] for d in duration_data}
88
89    with open(task_duration_cache, "w") as f:
90        json.dump(duration_data, f, indent=4)
91
92    try:
93        r = requests.get(GRAPH_QUANTILES_URL, stream=True)
94        r.raise_for_status()
95    except requests.exceptions.RequestException as exc:
96        # This is fine, the percentile just won't be in the preview window.
97        print(
98            "Error fetching task group percentiles from {}: {}".format(
99                GRAPH_QUANTILES_URL, exc
100            )
101        )
102        return
103
104    with open(graph_quantile_cache, "w") as f:
105        f.write(six.ensure_text(r.content))
106
107    with open(task_duration_tag_file, "w") as f:
108        json.dump({"download_date": datetime.now().strftime("%Y-%m-%d")}, f, indent=4)
109
110
111def make_trimmed_taskgraph_cache(graph_cache, dep_cache, target_file=None):
112    """Trim the taskgraph cache used for dependencies.
113
114    Speeds up the fzf preview window to less human-perceptible
115    ranges."""
116    if not os.path.isfile(graph_cache):
117        return
118
119    target_task_set = set()
120    if target_file and os.path.isfile(target_file):
121        with open(target_file) as f:
122            target_task_set = set(json.load(f).keys())
123
124    with open(graph_cache) as f:
125        graph = json.load(f)
126    graph = {
127        name: list(defn["dependencies"].values())
128        for name, defn in graph.items()
129        if name in target_task_set
130    }
131    with open(dep_cache, "w") as f:
132        json.dump(graph, f, indent=4)
133