1# coding=utf-8 2# Copyright 2016 Advanced Micro Devices, Inc. 3# 4# Permission is hereby granted, free of charge, to any person obtaining a 5# copy of this software and associated documentation files (the "Software"), 6# to deal in the Software without restriction, including without limitation 7# the rights to use, copy, modify, merge, publish, distribute, sublicense, 8# and/or sell copies of the Software, and to permit persons to whom the 9# Software is furnished to do so, subject to the following conditions: 10# 11# The above copyright notice and this permission notice (including the next 12# paragraph) shall be included in all copies or substantial portions of the 13# Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21# SOFTWARE. 22 23import os 24import random 25import textwrap 26 27from modules import utils 28 29TYPES = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'half', 'float', 'double'] 30VEC_SIZES = ['2', '3', '4', '8', '16'] 31 32DIR_NAME = os.path.join("cl", "vstore") 33 34 35def gen_array(size): 36 random.seed(size) 37 return ' '.join([str(random.randint(0, 255)) for i in range(size)]) 38 39 40def ext_req(type_name): 41 if type_name[:6] == "double": 42 return "require_device_extensions: cl_khr_fp64" 43 if type_name[:4] == "half": 44 return "require_device_extensions: cl_khr_fp16" 45 return "" 46 47 48def begin_test(suffix, type_name, mem_type, vec_sizes, addr_space, aligned): 49 file_name = os.path.join(DIR_NAME, "vstore{}-{}-{}.cl".format(suffix, type_name, addr_space)) 50 print(file_name) 51 f = open(file_name, 'w') 52 f.write(textwrap.dedent(("""\ 53 /*! 54 [config] 55 name: Vector store{suffix} {addr_space} {type_name}2,3,4,8,16 56 clc_version_min: 11 57 58 dimensions: 1 59 global_size: 1 0 0 60 """ + ext_req(type_name)) 61 .format(type_name=type_name, addr_space=addr_space, suffix=suffix))) 62 for s in vec_sizes: 63 size = int(s) if s != '' else 1 64 modsize = 4 if size == 3 and aligned else size 65 offset = modsize if aligned else 1 66 canary= '0xdeadp1' if type_name in ('float', 'double') else '0xdead' 67 68 ty_name = type_name + s 69 f.write(textwrap.dedent(""" 70 [test] 71 name: vector store{suffix} {addr_space} {type_name} 72 kernel_name: vstore{suffix}{n}_{addr_space} 73 arg_out: 0 buffer {mem_type}[{size}] {offset_zeros}{gen_array} {canary} 74 arg_in: 0 buffer {mem_type}[{size}] {offset_size_zeros} {canary} 75 arg_in: 1 buffer {type_name}[1] {gen_array} 76 77 [test] 78 name: vector store{suffix} {addr_space} offset {type_name} 79 kernel_name: vstore{suffix}{n}_{addr_space}_offset 80 arg_out: 0 buffer {mem_type}[{offset_size}] {offset_zeros} {gen_array} {padd_zeros} {gen_array} {canary} 81 arg_in: 0 buffer {mem_type}[{offset_size}] {offset_modsize_size_zeros} {canary} 82 arg_in: 1 buffer {type_name}[1] {gen_array} 83 """.format(type_name=ty_name, mem_type=mem_type, size=size + offset + 1, 84 offset_zeros = ("0 " * offset), 85 offset_size_zeros = ("0 " * (offset + size)), 86 padd_zeros = ("0 " * (modsize - size)), 87 offset_modsize_size_zeros = ("0 " * (modsize + size + offset)), 88 offset_size=modsize + size + offset + 1, n=s, 89 gen_array=gen_array(size), 90 suffix=suffix, addr_space=addr_space, 91 canary=canary))) 92 93 f.write(textwrap.dedent(""" 94 !*/ 95 """)) 96 if type_name == "double": 97 f.write(textwrap.dedent(""" 98 #pragma OPENCL EXTENSION cl_khr_fp64: enable 99 """)) 100 if type_name == "half": 101 f.write(textwrap.dedent(""" 102 #pragma OPENCL EXTENSION cl_khr_fp16: enable 103 """)) 104 return f 105 106 107def gen_test_global(suffix, t, mem_type, vec_sizes, aligned): 108 f = begin_test(suffix, t, mem_type, vec_sizes, 'global', aligned) 109 for s in vec_sizes: 110 offset = int(s) if aligned else 1 111 offset = 4 if offset == 3 else offset 112 113 type_name = t + s 114 f.write(textwrap.dedent(""" 115 kernel void vstore{suffix}{n}_global(global {mem_type} *out, 116 global {type_name} *in) {{ 117 {type_name} tmp = in[0]; 118 vstore{suffix}{n}(tmp, 0, out + {offset}); 119 }} 120 121 kernel void vstore{suffix}{n}_global_offset(global {mem_type} *out, 122 global {type_name} *in) {{ 123 {type_name} tmp = in[0]; 124 vstore{suffix}{n}(tmp, 0, out + {offset}); 125 vstore{suffix}{n}(tmp, 1, out + {offset}); 126 }} 127 """.format(type_name=type_name, mem_type=mem_type, n=s, suffix=suffix, 128 offset=offset))) 129 130 f.close() 131 132 133def gen_test_local_private(suffix, t, mem_type, vec_sizes, addr_space, aligned): 134 f = begin_test(suffix, t, mem_type, vec_sizes, addr_space, aligned) 135 for s in vec_sizes: 136 size = int(s) if s != '' else 1 137 modsize = 4 if size == 3 and aligned else size 138 offset = modsize if aligned else 1 139 140 type_name = t + s 141 f.write(textwrap.dedent(""" 142 kernel void vstore{suffix}{n}_{addr_space}(global {mem_type} *out, 143 global {type_name} *in) {{ 144 {type_name} tmp = in[0]; 145 volatile {addr_space} {mem_type} loc[{size}]; 146 for (int i = 0; i < {size}; ++i) 147 loc[i] = ({mem_type})0; 148 149 vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset}); 150 for (int i = 0; i < {size}; ++i) 151 out[i] = loc[i]; 152 }} 153 154 kernel void vstore{suffix}{n}_{addr_space}_offset(global {mem_type} *out, 155 global {type_name} *in) {{ 156 {type_name} tmp = in[0]; 157 volatile {addr_space} {mem_type} loc[{offset_size}]; 158 for (int i = 0; i < {offset_size}; ++i) 159 loc[i] = ({mem_type})0; 160 161 vstore{suffix}{n}(tmp, 0, ({addr_space} {mem_type}*)loc + {offset}); 162 vstore{suffix}{n}(tmp, 1, ({addr_space} {mem_type}*)loc + {offset}); 163 for (int i = 0; i < {offset_size}; ++i) 164 out[i] = loc[i]; 165 }} 166 """.format(type_name=type_name, mem_type=mem_type, n=s, suffix=suffix, 167 offset_size=size + modsize + offset, size=size + offset, 168 addr_space=addr_space, offset=offset))) 169 170 f.close() 171 172 173# vstore_half is special, because CLC won't allow us to use half type without 174# cl_khr_fp16 175def gen_test_local_private_half(suffix, t, vec_sizes, addr_space, aligned): 176 f = begin_test(suffix, t, 'half', vec_sizes, addr_space, aligned) 177 for s in vec_sizes: 178 size = int(s) if s != '' else 1 179 modsize = 4 if size == 3 and aligned else size 180 offset = modsize if aligned else 1 181 182 type_name = t + s 183 f.write(textwrap.dedent(""" 184 kernel void vstore{suffix}{n}_{addr_space}(global half *out, 185 global {type_name} *in) {{ 186 {type_name} tmp = in[0]; 187 volatile {addr_space} short loc[{size}]; 188 for (int i = 0; i < {size}; ++i) 189 loc[i] = 0; 190 191 vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset}); 192 193 for (int i = 0; i < {size}; ++i) 194 ((global short *)out)[i] = loc[i]; 195 }} 196 197 kernel void vstore{suffix}{n}_{addr_space}_offset(global half *out, 198 global {type_name} *in) {{ 199 {type_name} tmp = in[0]; 200 volatile {addr_space} short loc[{offset_size}]; 201 for (int i = 0; i < {offset_size}; ++i) 202 loc[i] = 0; 203 204 vstore{suffix}{n}(tmp, 0, ({addr_space} half*)loc + {offset}); 205 vstore{suffix}{n}(tmp, 1, ({addr_space} half*)loc + {offset}); 206 207 for (int i = 0; i < {offset_size}; ++i) 208 ((global short *)out)[i] = loc[i]; 209 }} 210 """.format(type_name=type_name, n=s, suffix=suffix, 211 offset_size=size + modsize + offset, size=size + offset, 212 addr_space=addr_space, offset=offset))) 213 214 215def gen_test_local(suffix, t, mem_type, vec_sizes, aligned): 216 if mem_type == 'half': 217 gen_test_local_private_half(suffix, t, vec_sizes, 'local', aligned) 218 else: 219 gen_test_local_private(suffix, t, mem_type, vec_sizes, 'local', aligned) 220 221 222def gen_test_private(suffix, t, mem_type, vec_sizes, aligned): 223 if mem_type == 'half': 224 gen_test_local_private_half(suffix, t, vec_sizes, 'private', aligned) 225 else: 226 gen_test_local_private(suffix, t, mem_type, vec_sizes, 'private', aligned) 227 228 229def main(): 230 utils.safe_makedirs(DIR_NAME) 231 for t in TYPES: 232 gen_test_global('', t, t, VEC_SIZES, False); 233 gen_test_local('', t, t, VEC_SIZES, False); 234 gen_test_private('', t, t, VEC_SIZES, False); 235 236 for aligned in False, True: 237 suffix = "a_half" if aligned else "_half" 238 vec_sizes = VEC_SIZES if aligned else [''] + VEC_SIZES 239 240 gen_test_global(suffix, 'float', 'half', vec_sizes, aligned); 241 gen_test_global(suffix, 'double', 'half', vec_sizes, aligned); 242 gen_test_local(suffix, 'float', 'half', vec_sizes, aligned); 243 gen_test_local(suffix, 'double', 'half', vec_sizes, aligned); 244 gen_test_private(suffix, 'float', 'half', vec_sizes, aligned); 245 gen_test_private(suffix, 'double', 'half', vec_sizes, aligned); 246 247 248if __name__ == '__main__': 249 main() 250