1#!/usr/bin/env python3
3# Copyright (c) Intel Corporation - All rights reserved.                      #
4# This file is part of the LIBXSMM library.                                   #
5#                                                                             #
6# For information on the license, see the LICENSE file.                       #
7# Further information: https://github.com/hfp/libxsmm/                        #
8# SPDX-License-Identifier: BSD-3-Clause                                       #
10# Hans Pabst (Intel Corp.)
13# This script is based on OpenTuner's tutorial:
14# "Optimizing Block Matrix Multiplication".
16import opentuner
17from opentuner import ConfigurationManipulator
18from opentuner import IntegerParameter
19from opentuner import MeasurementInterface
20from opentuner import Result
21import json
22import time
23import sys
24import re
27class TransposeTune(MeasurementInterface):
28    def manipulator(self):
29        """
30        Define the search space by creating a
31        ConfigurationManipulator
32        """
33        self.mintilesize = 2
34        self.granularity = 1
35        assert(0 < self.granularity)
36        minsize = max(self.mintilesize / self.granularity, 1)
37        maxsize = minsize + self.granularity
38        m_max = max(min(self.args.maxm, self.args.end), maxsize)
39        n_max = max(min(self.args.maxn, self.args.end), maxsize)
40        m_max = (m_max + self.granularity - 1) / self.granularity
41        n_max = (n_max + self.granularity - 1) / self.granularity
42        m_param = IntegerParameter("M", minsize, m_max)
43        n_param = IntegerParameter("N", minsize, n_max)
44        manipulator = ConfigurationManipulator()
45        manipulator.add_parameter(m_param)
46        manipulator.add_parameter(n_param)
47        return manipulator
49    def seed_configurations(self):
50        m_seed = [self.args.n, self.args.m][0 != self.args.m]
51        n_seed = [self.args.m, self.args.n][0 != self.args.n]
52        if 0 == m_seed or 0 == n_seed:
53            return []
54        else:
55            return [{"M": max(m_seed, self.mintilesize),
56                     "N": max(n_seed, self.mintilesize)}]
58    def objective(self):
59        return opentuner.search.objective.MaximizeAccuracyMinimizeSize()
61    def run(self, desired_result, input, limit):
62        """
63        Compile and run a given configuration then
64        return performance
65        """
66        cfg = desired_result.configuration.data
67        nruns = max(self.args.nruns, 1)
68        begin = max(self.args.begin, self.mintilesize)
69        end = max(self.args.end, self.mintilesize)
70        run_cmd = (
71            "CHECK=-1"  # repeatable runs
72            " LIBXSMM_TCOPY_M=" + str(self.granularity * cfg["M"]) +
73            " LIBXSMM_TCOPY_N=" + str(self.granularity * cfg["N"]) +
74            " ./transpose.sh o" + " " + str(end) + " " + str(end) +
75            " " + str(end) + " " + str(end) + " " + str(nruns) +
76            " -" + str(begin))
77        run_result = self.call_program(run_cmd)
78        if (0 == run_result["returncode"]):
79            match = re.search(
80                "\\s*duration:\\s+([0-9]+(\\.[0-9]*)*)",
81                str(run_result["stdout"]))
82            assert(match is not None)
83            mseconds = float(match.group(1)) / nruns
84            assert(0 < mseconds)
85            frequency = 1000.0 / mseconds
86            kernelsize = (self.granularity**2) * cfg["M"] * cfg["N"]
87            return Result(time=mseconds, accuracy=frequency, size=kernelsize)
88        else:
89            sys.tracebacklimit = 0
90            raise RuntimeError("Execution failed for \"" + run_cmd + "\"!")
92    def save_final_config(self, configuration):
93        """
94        called at the end of tuning
95        """
96        filename = (
97            "transpose-" + str(max(self.args.begin, 1)) +
98            "_" + str(max(self.args.end,   1)) +
99            "_" + str(max(self.args.nruns, 1)) +
100            time.strftime("-%Y%m%d-%H%M%S") + ".json")
101        print("Optimal block size written to " + filename +
102              ": ", configuration.data)
103        # self.manipulator().save_to_file(configuration.data, filename)
104        with open(filename, 'w') as fd:
105            json.dump(configuration.data, fd)
108if __name__ == "__main__":
109    argparser = opentuner.default_argparser()
110    argparser.add_argument(
111        "begin", type=int,
112        help="Begin of the range (min. M and N)")
113    argparser.add_argument(
114        "end", type=int,
115        help="End of the range (max. M and N)")
116    argparser.add_argument(
117        "nruns", type=int, default=100, nargs='?',
118        help="Number of experiments per epoch")
119    argparser.add_argument(
120        "m", type=int, default=0, nargs='?',
121        help="Initial tile size (M)")
122    argparser.add_argument(
123        "n", type=int, default=0, nargs='?',
124        help="Initial tile size (N)")
125    argparser.add_argument(
126        "maxm", type=int, default=160, nargs='?',
127        help="Max. tile size (M)")
128    argparser.add_argument(
129        "maxn", type=int, default=160, nargs='?',
130        help="Max. tile size (N)")
131    TransposeTune.main(argparser.parse_args())