1#!/usr/bin/env python3
2###############################################################################
3# Copyright (c) Intel Corporation - All rights reserved.                      #
4# This file is part of the LIBXSMM library.                                   #
5#                                                                             #
6# For information on the license, see the LICENSE file.                       #
7# Further information: https://github.com/hfp/libxsmm/                        #
8# SPDX-License-Identifier: BSD-3-Clause                                       #
9###############################################################################
10# Hans Pabst (Intel Corp.)
11###############################################################################
12#
13# This script is based on OpenTuner's tutorial:
14# "Optimizing Block Matrix Multiplication".
15#
16import opentuner
17from opentuner import ConfigurationManipulator
18from opentuner import IntegerParameter
19from opentuner import MeasurementInterface
20from opentuner import Result
21import json
22import time
23import sys
24import re
25
26
27class TransposeTune(MeasurementInterface):
28    def manipulator(self):
29        """
30        Define the search space by creating a
31        ConfigurationManipulator
32        """
33        self.mintilesize = 2
34        self.granularity = 1
35        assert(0 < self.granularity)
36        minsize = max(self.mintilesize / self.granularity, 1)
37        maxsize = minsize + self.granularity
38        m_max = max(min(self.args.maxm, self.args.end), maxsize)
39        n_max = max(min(self.args.maxn, self.args.end), maxsize)
40        m_max = (m_max + self.granularity - 1) / self.granularity
41        n_max = (n_max + self.granularity - 1) / self.granularity
42        m_param = IntegerParameter("M", minsize, m_max)
43        n_param = IntegerParameter("N", minsize, n_max)
44        manipulator = ConfigurationManipulator()
45        manipulator.add_parameter(m_param)
46        manipulator.add_parameter(n_param)
47        return manipulator
48
49    def seed_configurations(self):
50        m_seed = [self.args.n, self.args.m][0 != self.args.m]
51        n_seed = [self.args.m, self.args.n][0 != self.args.n]
52        if 0 == m_seed or 0 == n_seed:
53            return []
54        else:
55            return [{"M": max(m_seed, self.mintilesize),
56                     "N": max(n_seed, self.mintilesize)}]
57
58    def objective(self):
59        return opentuner.search.objective.MaximizeAccuracyMinimizeSize()
60
61    def run(self, desired_result, input, limit):
62        """
63        Compile and run a given configuration then
64        return performance
65        """
66        cfg = desired_result.configuration.data
67        nruns = max(self.args.nruns, 1)
68        begin = max(self.args.begin, self.mintilesize)
69        end = max(self.args.end, self.mintilesize)
70        run_cmd = (
71            "CHECK=-1"  # repeatable runs
72            " LIBXSMM_TCOPY_M=" + str(self.granularity * cfg["M"]) +
73            " LIBXSMM_TCOPY_N=" + str(self.granularity * cfg["N"]) +
74            " ./transpose.sh o" + " " + str(end) + " " + str(end) +
75            " " + str(end) + " " + str(end) + " " + str(nruns) +
76            " -" + str(begin))
77        run_result = self.call_program(run_cmd)
78        if (0 == run_result["returncode"]):
79            match = re.search(
80                "\\s*duration:\\s+([0-9]+(\\.[0-9]*)*)",
81                str(run_result["stdout"]))
82            assert(match is not None)
83            mseconds = float(match.group(1)) / nruns
84            assert(0 < mseconds)
85            frequency = 1000.0 / mseconds
86            kernelsize = (self.granularity**2) * cfg["M"] * cfg["N"]
87            return Result(time=mseconds, accuracy=frequency, size=kernelsize)
88        else:
89            sys.tracebacklimit = 0
90            raise RuntimeError("Execution failed for \"" + run_cmd + "\"!")
91
92    def save_final_config(self, configuration):
93        """
94        called at the end of tuning
95        """
96        filename = (
97            "transpose-" + str(max(self.args.begin, 1)) +
98            "_" + str(max(self.args.end,   1)) +
99            "_" + str(max(self.args.nruns, 1)) +
100            time.strftime("-%Y%m%d-%H%M%S") + ".json")
101        print("Optimal block size written to " + filename +
102              ": ", configuration.data)
103        # self.manipulator().save_to_file(configuration.data, filename)
104        with open(filename, 'w') as fd:
105            json.dump(configuration.data, fd)
106
107
108if __name__ == "__main__":
109    argparser = opentuner.default_argparser()
110    argparser.add_argument(
111        "begin", type=int,
112        help="Begin of the range (min. M and N)")
113    argparser.add_argument(
114        "end", type=int,
115        help="End of the range (max. M and N)")
116    argparser.add_argument(
117        "nruns", type=int, default=100, nargs='?',
118        help="Number of experiments per epoch")
119    argparser.add_argument(
120        "m", type=int, default=0, nargs='?',
121        help="Initial tile size (M)")
122    argparser.add_argument(
123        "n", type=int, default=0, nargs='?',
124        help="Initial tile size (N)")
125    argparser.add_argument(
126        "maxm", type=int, default=160, nargs='?',
127        help="Max. tile size (M)")
128    argparser.add_argument(
129        "maxn", type=int, default=160, nargs='?',
130        help="Max. tile size (N)")
131    TransposeTune.main(argparser.parse_args())
132