1"""Reads JSON files produced by the benchmarking framework and renders them.
2
3Installation:
4> apt-get install python3-pip
5> pip3 install matplotlib scipy numpy
6
7Run:
8> python3 render.py3 <files>
9
10Rendering can occur on disk by specifying the --output option or on screen if
11the --headless flag is not set.
12"""
13
14import argparse
15import collections
16import json
17import math
18import pprint
19import sys
20import matplotlib.pyplot as plt
21from matplotlib.ticker import EngFormatter
22import numpy as np
23import scipy.stats
24
25
26def format_freq(number):
27    """Returns a human readable frequency."""
28    magnitude = 0
29    while math.fabs(number) >= 1000:
30        number /= 1000.0
31        magnitude += 1
32    return "%g%sHz" % (number, ["", "k", "M", "G"][magnitude])
33
34
35def format_size(number):
36    """Returns number in human readable form."""
37    magnitude = 0
38    while number >= 1000 and number % 1000 == 0:
39        number /= 1000
40        magnitude += 1
41    return "%g%s" % (number, ["", "K", "M", "G"][magnitude])
42
43
44def mean_confidence_interval(dataset, confidence=0.95):
45    """Returns the mean and half confidence interval for the dataset."""
46    a = 1.0 * np.array(dataset)
47    n = len(a)
48    m, se = np.mean(a), scipy.stats.sem(a)
49    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
50    return m, h
51
52
53def add_plot(function_name, points):
54    """Plots measurements for a function."""
55    n = len(points.keys())
56    x = np.zeros(n)
57    y = np.zeros(n)
58    yerr = np.zeros(n)
59
60    for i, key in enumerate(sorted(points.keys())):
61        values = points[key]
62        m, e = mean_confidence_interval(values)
63        x[i] = key
64        y[i] = m
65        yerr[i] = e
66
67    plt.plot(x, y, linewidth=1, label=function_name)
68    plt.fill_between(x, y - yerr, y + yerr, alpha=0.5)
69
70
71def get_title(host):
72    """Formats the Host object into a title for the plot."""
73    cpu_name = host["CpuName"]
74    cpu_freq = format_freq(host["CpuFrequency"])
75    cache_strings = []
76    for cache in host["Caches"]:
77        prefix = {
78            "Instruction": "i",
79            "Data": "d",
80            "Unified": "u",
81        }.get(cache["Type"])
82        cache_strings.append(r"%sL_%d %s_{/%d}" %
83                             (prefix, cache["Level"], format_size(
84                                 cache["Size"]), cache["NumSharing"]))
85    title = "%s (%s)" % (cpu_name, cpu_freq)
86    subtitle = r"$" + ", ".join(sorted(cache_strings)) + r"$"
87    return title + "\n" + subtitle
88
89
90def get_host(jsons):
91    """Returns the host of the different json objects iff they are all the same.
92    """
93    host = None
94    for root in jsons:
95        if host and host != root["Host"]:
96            sys.exit("The datasets are not coming from the same Host")
97        if not host:
98            host = root["Host"]
99    return host
100
101
102def get_configuration(jsons):
103    """Returns the configuration of the different json objects iff they are all
104    the same.
105    """
106    config = None
107    for root in jsons:
108        if config and config != root["Configuration"]:
109            return None
110        if not config:
111            config = root["Configuration"]
112    return config
113
114
115def setup_graphs(files, display):
116    """Setups the graphs to render from the json files."""
117    jsons = []
118    for file in files:
119        with open(file) as json_file:
120            jsons.append(json.load(json_file))
121    if not jsons:
122        sys.exit("Nothing to process")
123
124    for root in jsons:
125        frequency = root["Host"]["CpuFrequency"]
126        for function in root["Functions"]:
127            function_name = function["Name"]
128            sizes = function["Sizes"]
129            runtimes = function["Runtimes"]
130            assert len(sizes) == len(runtimes)
131            values = collections.defaultdict(lambda: [])
132            for i in range(len(sizes)):
133              value = runtimes[i]
134              if display == "cycles":
135                  value = value * frequency
136              if display == "bytespercycle":
137                  value = value * frequency
138                  value = sizes[i] / value
139              values[sizes[i]].append(value)
140            add_plot(function_name, values)
141
142    config = get_configuration(jsons)
143    if config:
144        plt.figtext(
145            0.95,
146            0.15,
147            pprint.pformat(config),
148            verticalalignment="bottom",
149            horizontalalignment="right",
150            multialignment="left",
151            fontsize="small",
152            bbox=dict(boxstyle="round", facecolor="wheat"))
153
154    axes = plt.gca()
155    axes.set_title(get_title(get_host(jsons)))
156    axes.set_ylim(bottom=0)
157    axes.set_xlabel("Size")
158    axes.xaxis.set_major_formatter(EngFormatter(unit="B"))
159    if display == "cycles":
160          axes.set_ylabel("Cycles")
161    if display == "time":
162          axes.set_ylabel("Time")
163          axes.yaxis.set_major_formatter(EngFormatter(unit="s"))
164    if display == "bytespercycle":
165          axes.set_ylabel("bytes/cycle")
166
167    plt.legend()
168    plt.grid()
169
170
171def main():
172    parser = argparse.ArgumentParser(
173        description="Process benchmark json files.")
174    parser.add_argument("files", nargs="+", help="The json files to read from.")
175    parser.add_argument("--output", help="The output file to write the graph.")
176    parser.add_argument(
177        "--headless",
178        help="If set do not display the graph.",
179        action="store_true")
180    parser.add_argument(
181        "--display",
182        choices= ["time", "cycles", "bytespercycle"],
183        default="time",
184        help="Use to display either 'time', 'cycles' or 'bytes/cycle'.")
185
186    args = parser.parse_args()
187    setup_graphs(args.files, args.display)
188    if args.output:
189        plt.savefig(args.output)
190    if not args.headless:
191        plt.show()
192
193if __name__ == "__main__":
194    main()
195