1# This Source Code Form is subject to the terms of the Mozilla Public
2# License, v. 2.0. If a copy of the MPL was not distributed with this
3# file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5from __future__ import absolute_import, division
6
7import argparse
8import collections
9import csv
10import os
11import six
12import sys
13from calendar import day_name
14from datetime import datetime
15
16import compare
17import numpy
18
19sys.path.insert(1, os.path.join(sys.path[0], ".."))
20
21
22def get_branch(platform):
23    if platform.startswith("OSX"):
24        return compare.branch_map["Inbound"]["pgo"]["id"]
25    return compare.branch_map["Inbound"]["nonpgo"]["id"]
26
27
28def get_all_test_tuples():
29    ret = []
30    for test in compare.test_map:
31        for platform in compare.platform_map:
32            ret.extend(get_tuple(test, platform))
33    return ret
34
35
36def get_tuple(test, platform):
37    return [
38        (
39            compare.test_map[test]["id"],
40            get_branch(platform),
41            compare.platform_map[platform],
42            test,
43            platform,
44        )
45    ]
46
47
48def generate_report(tuple_list, filepath, mode="variance"):
49    avg = []
50
51    for test in tuple_list:
52        testid, branchid, platformid = test[:3]
53        data_dict = compare.getGraphData(testid, branchid, platformid)
54        week_avgs = []
55
56        if data_dict:
57            data = data_dict["test_runs"]
58            data.sort(key=lambda x: x[3])
59            data = data[int(0.1 * len(data)) : int(0.9 * len(data) + 1)]
60            time_dict = collections.OrderedDict()
61            days = {}
62
63            for point in data:
64                time = datetime.fromtimestamp(point[2]).strftime("%Y-%m-%d")
65                time_dict[time] = time_dict.get(time, []) + [point[3]]
66
67            for time in time_dict:
68                runs = len(time_dict[time])
69                weekday = datetime.strptime(time, "%Y-%m-%d").strftime("%A")
70                variance = numpy.var(time_dict[time])
71                if mode == "variance":
72                    days[weekday] = days.get(weekday, []) + [variance]
73                elif mode == "count":
74                    days[weekday] = days.get(weekday, []) + [runs]
75
76            line = ["-".join(test[3:])]
77            for day in day_name:
78                if mode == "variance":
79                    # removing top and bottom 10% to reduce outlier influence
80                    # pylint --py3k W1619
81                    tenth = len(days[day]) / 10
82                    average = numpy.average(sorted(days[day])[tenth : tenth * 9 + 1])
83                elif mode == "count":
84                    average = numpy.average(days[day])
85                line.append("%.3f" % average)
86                week_avgs.append(average)
87
88            outliers = is_normal(week_avgs)
89            for j in six.moves.range(7):
90                if j in outliers:
91                    line[j + 1] = "**" + str(line[j + 1]) + "**"
92
93            avg.append(line)
94
95    with open(filepath, "wb") as report:
96        avgs_header = csv.writer(report, quoting=csv.QUOTE_ALL)
97        avgs_header.writerow(["test-platform"] + list(day_name))
98        for line in avg:
99            out = csv.writer(report, quoting=csv.QUOTE_ALL)
100            out.writerow(line)
101
102
103def is_normal(y):
104    # This is a crude initial attempt at detecting normal distributions
105    # TODO: Improve this
106    limit = 1.5
107    clean_week = []
108    outliers = []
109    # find a baseline for the week
110    if (min(y[0:4]) * limit) <= max(y[0:4]):
111        for i in six.moves.range(1, 5):
112            if y[i] > (y[i - 1] * limit) or y[i] > (y[i + 1] * limit):
113                outliers.append(i)
114                continue
115            clean_week.append(y[i])
116    else:
117        clean_week = y
118
119    # look at weekends now
120    # pylint --py3k W1619
121    avg = sum(clean_week) / len(clean_week)
122    for i in six.moves.range(5, 7):
123        # look for something outside of the 20% window
124        if (y[i] * 1.2) < avg or y[i] > (avg * 1.2):
125            outliers.append(i)
126    return outliers
127
128
129def main():
130    parser = argparse.ArgumentParser(description="Generate weekdays reports")
131    parser.add_argument("--test", help="show only the test named TEST")
132    parser.add_argument("--platform", help="show only the platform named PLATFORM")
133    parser.add_argument("--mode", help="select mode", default="variance")
134    args = parser.parse_args()
135    tuple_list = get_all_test_tuples()
136    f = "report"
137    if args.platform:
138        tuple_list = [x for x in tuple_list if x[4] == args.platform]
139        f += "-%s" % args.platform
140
141    if args.test:
142        tuple_list = [x for x in tuple_list if x[3] == args.test]
143        f += "-%s" % args.test
144
145    f += "-%s" % args.mode
146    generate_report(tuple_list, filepath=f + ".csv", mode=args.mode)
147
148
149if __name__ == "__main__":
150    main()
151