1#!/usr/bin/env python
2
3# Compare CP2K outputs
4# Author: Alfio Lazzaro
5# Email: alfio.lazzaro@mat.ethz.ch
6# Year: 2016
7
8# Example 1: show timings for a CP2K output
9#       > diff_cp2k.py <name_file>
10#    It shows the values of the timings for the MAXIMUM SELF
11#    timings as extracted from the final table of timings of
12#    CP2K output. The values are sorted (only values >0).
13#    You can use the option
14#       -f <1 || 2 || 3 || 4>
15#    to change between AVERAGE SELF (1), MAX SELF (2), AVERAGE TOTAL (3) or MAX TOTAL (4).
16#    The last line CP2K_Total refers always to the MAXIMUM TOTAL TIME.
17#    There is also the possibility to filter between the SUBROUTINE names
18#    by using the options:
19#       -g <name> : partial comparison (similar to linux command grep)
20#       -e <name> : exact comparison
21#    (regexp are not implemented)
22#
23# Example 2: compare two (or more) CP2K outputs
24#       > diff_cp2k.py <list of files>
25#    You can use wild cards (for example *.out).
26#    It shows the timings from all outputs, sorted by the values
27#    of the first file, which is the reference for the comparison.
28#    It also shows the relative difference (in percentage) with respect to
29#    the reference values. Colors/bold are used to easy spot the larger discrepancies:
30#       blue: smaller than reference
31#       blue bold: smaller than reference > 100%
32#       green: bigger than reference
33#       green bold: bigger than reference > 100%
34#    A set of dashes "-------" are reported for SUBROUTINES that
35#    are only in the reference file, while the SUBROUTINES
36#    that are only in the other files are reported for each file at the end.
37#    You can use the option
38#       -b <#>
39#    to change the file used as reference (default is 1).
40#    The other options mentioned in Example 1 are still valid here.
41#    It is possible to replace the SUBROUTINE names. This feature allows, for example,
42#    to compare SUBROUTINEs with different names belonging to different files.
43#    Create a file, called diff_cp2k_keys.py, where you declare
44#    the SUBROUTINE names and their replacements, e.g.
45#        special_keys={'cannon_multiply_low_rma_metroc':'cannon_multiply_low_metrocomm1' ,
46#                      'cannon_multiply_low_rma':'cannon_multiply_low'}
47#    In this case the SUBROUTINE with name cannon_multiply_low_rma_metroc will be
48#    replaced by the name cannon_multiply_low_metrocomm1.
49#    The file is automatically loaded from the local directory where
50#    you run the script or from the home directory. Alternatively it is possible
51#    to use the option
52#       -k <file keys>
53#    to specify a different file.
54#
55# Example 3: grep for some other values
56#    As described in Example 2, create a file, called diff_cp2k_keys.py,
57#    where you declare the keywords that you want to grep from the output, e.g.
58#       stats_keys={'flops total':[0],'average stack size':[1,2]}
59#    The script splits the line by the keyword in two parts and reports
60#    the field at the given position of the right part.
61#    The file is automatically loaded from the local directory where
62#    you run the script or from the home directory. Alternatively it is possible
63#    to use the option
64#       -k <file keys>
65#    to specify a different file.
66#    The values will appear under "Stats report".
67#
68
69import sys
70import argparse
71import operator
72import os
73import imp
74
75
76def read_file(filename, field, special_keys, stats_keys):
77    try:
78        nline = 0
79        nstats = 0
80        dict_values = {}
81        dict_stats = {}
82        nameout = ["", ""]
83        with open(filename, "r") as f:
84            for line in f:
85                # start reading
86                if "NAMEOUT=" in line:
87                    nameout[0] = line.split("=", 2)[1].strip()
88                    continue
89                if "ENERGY| Total FORCE_EVAL ( QS ) energy (a.u.):" in line:
90                    nameout[1] = line.split(":", 2)[1].strip()
91                    continue
92                if "DBCSR STATISTICS" not in line and nstats == 0:
93                    continue
94                nstats = 1
95                for stats_key in stats_keys:
96                    if stats_key in line:
97                        for index in stats_keys[stats_key]:
98                            index_key = stats_key.strip() + " [" + str(index) + "]"
99                            dict_stats[index_key] = line.split(stats_key, 2)[1].split()[
100                                index
101                            ]
102                        break
103                if "T I M I N G" not in line and nline == 0:
104                    continue
105                nline += 1
106                if nline < 6:
107                    continue
108                # end reading
109                if "-----" in line:
110                    nline = 0
111                    continue
112                values = line.split()
113                # filter
114                if float(values[3 + field]) <= 0.001 and values[0] != "CP2K":
115                    continue
116                if values[0] in special_keys:
117                    values[0] = special_keys[values[0]]
118                # take only he first timing of duplicate special_keys
119                if values[0] in dict_values:
120                    continue
121                if values[0] == "CP2K":
122                    dict_values[values[0] + "_Total"] = float(values[6])
123                else:
124                    dict_values[values[0]] = float(values[3 + field])
125
126        f.closed
127        return dict_values, dict_stats, nameout
128    except IOError:
129        print("Cannot open " + filename)
130        print("Exit")
131        sys.exit(-1)
132
133
134def print_value(ref, value):
135    if ref > 0:
136        comp = (value - ref) / ref * 100
137    else:
138        comp = float("Inf")
139    color = "\033[0m"
140    endc = "\033[0m"
141    if comp > 0:
142        color = "\033[92m"
143    elif comp < 0:
144        color = "\033[94m"
145    if abs(comp) > 100:
146        color += "\033[1m"
147    sys.stdout.write(color + "%10.3f" % value + "%5.0f" % comp + endc)
148
149
150#################
151# Main function #
152#################
153
154
155def main():
156    parser = argparse.ArgumentParser(description="Comparison of CP2K output timings.")
157    parser.add_argument("file_lists", nargs="+", help="list of files")
158    parser.add_argument(
159        "-f",
160        metavar="field",
161        type=int,
162        dest="field",
163        choices=xrange(1, 5),
164        default=2,
165        help="which field to show (default is 2)",
166    )
167    parser.add_argument(
168        "-b",
169        metavar="base",
170        type=int,
171        dest="base",
172        default=1,
173        help="which file to use as base for the comparison (default is 1)",
174    )
175    parser.add_argument(
176        "-g",
177        metavar="grep",
178        nargs="+",
179        dest="grep",
180        default="",
181        help="Fields to grep (check the inclusion correspondance of the words)",
182    )
183    parser.add_argument(
184        "-e",
185        metavar="filter",
186        nargs="+",
187        dest="filter",
188        default="",
189        help="Fields to grep (check the exact correspondance of the words)",
190    )
191    parser.add_argument(
192        "-k", metavar="file_keys", dest="file_keys", default="", help="File of keys"
193    )
194    args = parser.parse_args()
195
196    # Empty keys by default
197    special_keys = {}
198    stats_keys = {}
199
200    # Check for keys file
201    file_keys = []
202    if len(args.file_keys) > 0:
203        file_keys.append(os.path.abspath(args.file_keys))
204    else:
205        # if not file_keys is provided, then look for it in the local directory and home
206        file_keys.append(os.getcwd() + "/diff_cp2k_keys.py")
207        file_keys.append(os.path.expanduser("~") + "/diff_cp2k_keys.py")
208
209    for filename in file_keys:
210        try:
211            module = imp.load_source("*", filename)
212            special_keys = module.special_keys
213            stats_keys = module.stats_keys
214        except IOError:
215            if len(args.file_keys) > 0:
216                print("Cannont open file keys " + filename + "!")
217                print("Exit")
218                sys.exit(-1)
219
220    if args.base < 1 or args.base > len(args.file_lists):
221        print(
222            "Value for -b option out-of-bounds! Allowed values are between 1 and "
223            + str(len(args.file_lists))
224        )
225        print("Exit")
226        sys.exit(-1)
227
228    dict_values = {}
229    dict_stats = {}
230    files = {}
231    for filename in args.file_lists:
232        dict_values[filename], dict_stats[filename], files[filename] = read_file(
233            filename, args.field - 1, special_keys, stats_keys
234        )
235
236    print("===== Timings report =====")
237
238    # sorted by first file timings
239    sorted_values = sorted(
240        dict_values[args.file_lists[args.base - 1]].items(), key=operator.itemgetter(1)
241    )
242    for key in sorted_values:
243        # Apply filtering
244        if key[0] != "CP2K_Total" and (
245            (len(args.grep) > 0 and any(s not in key[0] for s in args.grep))
246            or (len(args.filter) > 0 and key[0] not in args.filter)
247        ):
248            continue
249        sys.stdout.write(key[0].ljust(30) + "%10.3f" % key[1])
250        for filename in args.file_lists:
251            if filename == args.file_lists[args.base - 1]:
252                continue
253            if key[0] not in dict_values[filename]:
254                sys.stdout.write(("-" * 10).rjust(15))
255                continue
256            print_value(key[1], dict_values[filename][key[0]])
257            del dict_values[filename][key[0]]
258        print("")
259
260    print("")
261
262    ref = 0
263    if len(files[args.file_lists[args.base - 1]][1]) > 0:
264        ref = float(files[args.file_lists[args.base - 1]][1])
265    color = "\033[0m"
266    endc = "\033[0m"
267    for filename in args.file_lists:
268        if len(files[filename][1]) > 0:
269            comp = (float(files[filename][1]) - ref) / ref
270            if abs(comp) > 1e-14:
271                color = "\033[91m"
272            else:
273                color = "\033[0m"
274            print(
275                ("{0} ==> {1} : {2} : " + color + "{3}" + endc).format(
276                    files[filename][0],
277                    filename,
278                    files[filename][1],
279                    (float(files[filename][1]) - ref) / ref,
280                )
281            )
282        else:
283            print(("{0} ==> {1} : ").format(files[filename][0], filename)),
284            sys.stdout.write(("-" * 20).rjust(20))
285            print("")
286
287    print("")
288
289    for filename in args.file_lists:
290        if filename == args.file_lists[args.base - 1]:
291            continue
292        print("Remaining entries in " + files[filename][0] + " ==> " + filename)
293        sorted_values = sorted(
294            dict_values[filename].items(), key=operator.itemgetter(1)
295        )
296        count = 0
297        for key in sorted_values:
298            # Apply filtering
299            if (len(args.grep) > 0 and any(s not in key[0] for s in args.grep)) or (
300                len(args.filter) > 0 and key[0] not in args.filter
301            ):
302                continue
303            print(key[0].ljust(30) + "%10.3f" % key[1])
304            count += 1
305        if count == 0:
306            print("<None>")
307        print("")
308
309    print("===== Stats report =====")
310
311    if len(stats_keys) > 0:
312        for stats_key in stats_keys:
313            for index in stats_keys[stats_key]:
314                index_key = stats_key.strip() + " [" + str(index) + "]"
315                sys.stdout.write(index_key.ljust(35))
316                for filename in args.file_lists:
317                    if index_key not in dict_stats[filename]:
318                        sys.stdout.write(("-" * 18).ljust(20))
319                        continue
320                    sys.stdout.write(dict_stats[filename][index_key].ljust(20))
321                print("")
322    else:
323        print("<None>")
324
325    print("")
326
327
328# ===============================================================================
329main()
330