1# coding=utf-8
2# Copyright 2016 Advanced Micro Devices, Inc.
3#
4# Permission is hereby granted, free of charge, to any person obtaining a
5# copy of this software and associated documentation files (the "Software"),
6# to deal in the Software without restriction, including without limitation
7# the rights to use, copy, modify, merge, publish, distribute, sublicense,
8# and/or sell copies of the Software, and to permit persons to whom the
9# Software is furnished to do so, subject to the following conditions:
10#
11# The above copyright notice and this permission notice (including the next
12# paragraph) shall be included in all copies or substantial portions of the
13# Software.
14#
15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21# SOFTWARE.
22
23import os
24import random
25import textwrap
26
27from modules import utils
28
29TYPES = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'half', 'float', 'double']
30VEC_SIZES = ['2', '3', '4', '8', '16']
31
32DIR_NAME = os.path.join("cl", "vstore")
33
34
35def gen_array(size):
36    random.seed(size)
37    return ' '.join([str(random.randint(0, 255)) for i in range(size)])
38
39
40def ext_req(type_name):
41    if type_name[:6] == "double":
42        return "require_device_extensions: cl_khr_fp64"
43    if type_name[:4] == "half":
44        return "require_device_extensions: cl_khr_fp16"
45    return ""
46
47
48def begin_test(suffix, type_name, mem_type, vec_sizes, addr_space, aligned):
49    file_name = os.path.join(DIR_NAME, "vstore{}-{}-{}.cl".format(suffix, type_name, addr_space))
50    print(file_name)
51    f = open(file_name, 'w')
52    f.write(textwrap.dedent(("""\
53    /*!
54    [config]
55    name: Vector store{suffix} {addr_space} {type_name}2,3,4,8,16
56    clc_version_min: 11
57
58    dimensions: 1
59    global_size: 1 0 0
60    """ + ext_req(type_name))
61    .format(type_name=type_name, addr_space=addr_space, suffix=suffix)))
62    for s in vec_sizes:
63        size = int(s) if s != '' else 1
64        modsize = 4 if size == 3 and aligned else size
65        offset = modsize if aligned else 1
66        canary= '0xdeadp1' if type_name in ('float', 'double') else '0xdead'
67
68        ty_name = type_name + s
69        f.write(textwrap.dedent("""
70        [test]
71        name: vector store{suffix} {addr_space} {type_name}
72        kernel_name: vstore{suffix}{n}_{addr_space}
73        arg_out: 0 buffer {mem_type}[{size}] {offset_zeros}{gen_array} {canary}
74        arg_in: 0 buffer {mem_type}[{size}] {offset_size_zeros} {canary}
75        arg_in:  1 buffer {type_name}[1] {gen_array}
76
77        [test]
78        name: vector store{suffix} {addr_space} offset {type_name}
79        kernel_name: vstore{suffix}{n}_{addr_space}_offset
80        arg_out: 0 buffer {mem_type}[{offset_size}] {offset_zeros} {gen_array} {padd_zeros} {gen_array} {canary}
81        arg_in: 0 buffer {mem_type}[{offset_size}] {offset_modsize_size_zeros} {canary}
82        arg_in: 1 buffer {type_name}[1] {gen_array}
83        """.format(type_name=ty_name, mem_type=mem_type, size=size + offset + 1,
84                   offset_zeros = ("0 " * offset),
85                   offset_size_zeros = ("0 " * (offset + size)),
86                   padd_zeros = ("0 " * (modsize - size)),
87                   offset_modsize_size_zeros = ("0 " * (modsize + size + offset)),
88                   offset_size=modsize + size + offset + 1, n=s,
89                   gen_array=gen_array(size),
90                   suffix=suffix, addr_space=addr_space,
91                   canary=canary)))
92
93    f.write(textwrap.dedent("""
94    !*/
95    """))
96    if type_name == "double":
97        f.write(textwrap.dedent("""
98        #pragma OPENCL EXTENSION cl_khr_fp64: enable
99        """))
100    if type_name == "half":
101        f.write(textwrap.dedent("""
102        #pragma OPENCL EXTENSION cl_khr_fp16: enable
103        """))
104    return f
105
106
107def gen_test_global(suffix, t, mem_type, vec_sizes, aligned):
108    f = begin_test(suffix, t, mem_type, vec_sizes, 'global', aligned)
109    for s in vec_sizes:
110        offset = int(s) if aligned else 1
111        offset = 4 if offset == 3 else offset
112
113        type_name = t + s
114        f.write(textwrap.dedent("""
115        kernel void vstore{suffix}{n}_global(global {mem_type} *out,
116                                     global {type_name} *in) {{
117            {type_name} tmp = in[0];
118            vstore{suffix}{n}(tmp, 0, out + {offset});
119        }}
120
121        kernel void vstore{suffix}{n}_global_offset(global {mem_type} *out,
122                                            global {type_name} *in) {{
123            {type_name} tmp = in[0];
124            vstore{suffix}{n}(tmp, 0, out + {offset});
125            vstore{suffix}{n}(tmp, 1, out + {offset});
126        }}
127        """.format(type_name=type_name, mem_type=mem_type, n=s, suffix=suffix,
128                   offset=offset)))
129
130    f.close()
131
132
133def gen_test_local_private(suffix, t, mem_type, vec_sizes, addr_space, aligned):
134    f = begin_test(suffix, t, mem_type, vec_sizes, addr_space, aligned)
135    for s in vec_sizes:
136        size = int(s) if s != '' else 1
137        modsize = 4 if size == 3 and aligned else size
138        offset = modsize if aligned else 1
139
140        type_name = t + s
141        f.write(textwrap.dedent("""
142        kernel void vstore{suffix}{n}_{addr_space}(global {mem_type} *out,
143                                     global {type_name} *in) {{
144            {type_name} tmp = in[0];
145            volatile {addr_space} {mem_type} loc[{size}];
146            for (int i = 0; i < {size}; ++i)
147                loc[i] = ({mem_type})0;
148
149            vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset});
150            for (int i = 0; i < {size}; ++i)
151                out[i] = loc[i];
152        }}
153
154        kernel void vstore{suffix}{n}_{addr_space}_offset(global {mem_type} *out,
155                                            global {type_name} *in) {{
156            {type_name} tmp = in[0];
157            volatile {addr_space} {mem_type} loc[{offset_size}];
158            for (int i = 0; i < {offset_size}; ++i)
159                loc[i] = ({mem_type})0;
160
161            vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset});
162            vstore{suffix}{n}(tmp, 1, ({addr_space} {mem_type}*)loc + {offset});
163            for (int i = 0; i < {offset_size}; ++i)
164                out[i] = loc[i];
165        }}
166        """.format(type_name=type_name, mem_type=mem_type, n=s, suffix=suffix,
167                   offset_size=size + modsize + offset, size=size + offset,
168                   addr_space=addr_space, offset=offset)))
169
170    f.close()
171
172
173# vstore_half is special, because CLC won't allow us to use half type without
174# cl_khr_fp16
175def gen_test_local_private_half(suffix, t, vec_sizes, addr_space, aligned):
176    f = begin_test(suffix, t, 'half', vec_sizes, addr_space, aligned)
177    for s in vec_sizes:
178        size = int(s) if s != '' else 1
179        modsize = 4 if size == 3 and aligned else size
180        offset = modsize if aligned else 1
181
182        type_name = t + s
183        f.write(textwrap.dedent("""
184        kernel void vstore{suffix}{n}_{addr_space}(global half *out,
185                                     global {type_name} *in) {{
186            {type_name} tmp = in[0];
187            volatile {addr_space} short loc[{size}];
188            for (int i = 0; i < {size}; ++i)
189                loc[i] = 0;
190
191            vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset});
192
193            for (int i = 0; i < {size}; ++i)
194                ((global short *)out)[i] = loc[i];
195        }}
196
197        kernel void vstore{suffix}{n}_{addr_space}_offset(global half *out,
198                                            global {type_name} *in) {{
199            {type_name} tmp = in[0];
200            volatile {addr_space} short loc[{offset_size}];
201            for (int i = 0; i < {offset_size}; ++i)
202                loc[i] = 0;
203
204            vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset});
205            vstore{suffix}{n}(tmp, 1, ({addr_space} half*)loc + {offset});
206
207            for (int i = 0; i < {offset_size}; ++i)
208                ((global short *)out)[i] = loc[i];
209        }}
210        """.format(type_name=type_name, n=s, suffix=suffix,
211                   offset_size=size + modsize + offset, size=size + offset,
212                   addr_space=addr_space, offset=offset)))
213
214
215def gen_test_local(suffix, t, mem_type, vec_sizes, aligned):
216    if mem_type == 'half':
217        gen_test_local_private_half(suffix, t, vec_sizes, 'local', aligned)
218    else:
219        gen_test_local_private(suffix, t, mem_type, vec_sizes, 'local', aligned)
220
221
222def gen_test_private(suffix, t, mem_type, vec_sizes, aligned):
223    if mem_type == 'half':
224        gen_test_local_private_half(suffix, t, vec_sizes, 'private', aligned)
225    else:
226        gen_test_local_private(suffix, t, mem_type, vec_sizes, 'private', aligned)
227
228
229def main():
230    utils.safe_makedirs(DIR_NAME)
231    for t in TYPES:
232        gen_test_global('', t, t, VEC_SIZES, False);
233        gen_test_local('', t, t, VEC_SIZES, False);
234        gen_test_private('', t, t, VEC_SIZES, False);
235
236    for aligned in False, True:
237        suffix = "a_half" if aligned else "_half"
238        vec_sizes = VEC_SIZES if aligned else [''] + VEC_SIZES
239
240        gen_test_global(suffix, 'float',  'half', vec_sizes, aligned);
241        gen_test_global(suffix, 'double', 'half', vec_sizes, aligned);
242        gen_test_local(suffix, 'float',  'half', vec_sizes, aligned);
243        gen_test_local(suffix, 'double', 'half', vec_sizes, aligned);
244        gen_test_private(suffix, 'float',  'half', vec_sizes, aligned);
245        gen_test_private(suffix, 'double', 'half', vec_sizes, aligned);
246
247
248if __name__ == '__main__':
249    main()
250