1# OpenCL built-in library: type conversion functions 2# 3# Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com> 4# Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com> 5# 6# Permission is hereby granted, free of charge, to any person obtaining a copy 7# of this software and associated documentation files (the "Software"), to deal 8# in the Software without restriction, including without limitation the rights 9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10# copies of the Software, and to permit persons to whom the Software is 11# furnished to do so, subject to the following conditions: 12# 13# The above copyright notice and this permission notice shall be included in 14# all copies or substantial portions of the Software. 15# 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22# THE SOFTWARE. 23 24# This script generates the file convert_type.cl, which contains all of the 25# OpenCL functions in the form: 26# 27# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>) 28 29types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double'] 30int_types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong'] 31unsigned_types = ['uchar', 'ushort', 'uint', 'ulong'] 32float_types = ['float', 'double'] 33int64_types = ['long', 'ulong'] 34float64_types = ['double'] 35vector_sizes = ['', '2', '3', '4', '8', '16'] 36half_sizes = [('2',''), ('4','2'), ('8','4'), ('16','8')] 37 38saturation = ['','_sat'] 39rounding_modes = ['_rtz','_rte','_rtp','_rtn'] 40float_prefix = {'float':'FLT_', 'double':'DBL_'} 41float_suffix = {'float':'f', 'double':''} 42 43bool_type = {'char' : 'char', 44 'uchar' : 'char', 45 'short' : 'short', 46 'ushort': 'short', 47 'int' : 'int', 48 'uint' : 'int', 49 'long' : 'long', 50 'ulong' : 'long', 51 'float' : 'int', 52 'double' : 'long'} 53 54unsigned_type = {'char' : 'uchar', 55 'uchar' : 'uchar', 56 'short' : 'ushort', 57 'ushort': 'ushort', 58 'int' : 'uint', 59 'uint' : 'uint', 60 'long' : 'ulong', 61 'ulong' : 'ulong'} 62 63sizeof_type = {'char' : 1, 'uchar' : 1, 64 'short' : 2, 'ushort' : 2, 65 'int' : 4, 'uint' : 4, 66 'long' : 8, 'ulong' : 8, 67 'float' : 4, 'double' : 8} 68 69limit_max = {'char' : 'CHAR_MAX', 70 'uchar' : 'UCHAR_MAX', 71 'short' : 'SHRT_MAX', 72 'ushort': 'USHRT_MAX', 73 'int' : 'INT_MAX', 74 'uint' : 'UINT_MAX', 75 'long' : 'LONG_MAX', 76 'ulong' : 'ULONG_MAX'} 77 78limit_min = {'char' : 'CHAR_MIN', 79 'uchar' : '0', 80 'short' : 'SHRT_MIN', 81 'ushort': '0', 82 'int' : 'INT_MIN', 83 'uint' : '0', 84 'long' : 'LONG_MIN', 85 'ulong' : '0'} 86 87def conditional_guard(src, dst): 88 int64_count = 0 89 float64_count = 0 90 if src in int64_types: 91 int64_count = int64_count +1 92 elif src in float64_types: 93 float64_count = float64_count + 1 94 if dst in int64_types: 95 int64_count = int64_count +1 96 elif dst in float64_types: 97 float64_count = float64_count + 1 98 if float64_count > 0: 99 #In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be 100 print("#ifdef cl_khr_fp64") 101 return True 102 elif int64_count > 0: 103 print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)") 104 return True 105 return False 106 107 108print("""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!! 109 110 DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN: 111 $ ./generate-conversion-type-cl.sh 112 113 OpenCL type conversion functions 114 115 Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com> 116 Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com> 117 118 Permission is hereby granted, free of charge, to any person obtaining a copy 119 of this software and associated documentation files (the "Software"), to deal 120 in the Software without restriction, including without limitation the rights 121 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 122 copies of the Software, and to permit persons to whom the Software is 123 furnished to do so, subject to the following conditions: 124 125 The above copyright notice and this permission notice shall be included in 126 all copies or substantial portions of the Software. 127 128 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 129 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 130 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 131 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 132 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 133 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 134 THE SOFTWARE. 135*/ 136 137#include <clc/clc.h> 138 139#ifdef cl_khr_fp64 140#pragma OPENCL EXTENSION cl_khr_fp64 : enable 141 142#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64) 143#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64 144#endif 145 146#endif 147 148#ifdef cles_khr_int64 149#pragma OPENCL EXTENSION cles_khr_int64 : enable 150#endif 151 152""") 153 154# 155# Default Conversions 156# 157# All conversions are in accordance with the OpenCL specification, 158# which cites the C99 conversion rules. 159# 160# Casting from floating point to integer results in conversions 161# with truncation, so it should be suitable for the default convert 162# functions. 163# 164# Conversions from integer to floating-point, and floating-point to 165# floating-point through casting is done with the default rounding 166# mode. While C99 allows dynamically changing the rounding mode 167# during runtime, it is not a supported feature in OpenCL according 168# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification. 169# 170# Therefore, we can assume for optimization purposes that the 171# rounding mode is fixed to round-to-nearest-even. Platform target 172# authors should ensure that the rounding-control registers remain 173# in this state, and that this invariant holds. 174# 175# Also note, even though the OpenCL specification isn't entirely 176# clear on this matter, we implement all rounding mode combinations 177# even for integer-to-integer conversions. When such a conversion 178# is used, the rounding mode is ignored. 179# 180 181def generate_default_conversion(src, dst, mode): 182 close_conditional = conditional_guard(src, dst) 183 184 # scalar conversions 185 print("""_CLC_DEF _CLC_OVERLOAD 186{DST} convert_{DST}{M}({SRC} x) 187{{ 188 return ({DST})x; 189}} 190""".format(SRC=src, DST=dst, M=mode)) 191 192 # vector conversions, done through decomposition to components 193 for size, half_size in half_sizes: 194 print("""_CLC_DEF _CLC_OVERLOAD 195{DST}{N} convert_{DST}{N}{M}({SRC}{N} x) 196{{ 197 return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi)); 198}} 199""".format(SRC=src, DST=dst, N=size, H=half_size, M=mode)) 200 201 # 3-component vector conversions 202 print("""_CLC_DEF _CLC_OVERLOAD 203{DST}3 convert_{DST}3{M}({SRC}3 x) 204{{ 205 return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2)); 206}}""".format(SRC=src, DST=dst, M=mode)) 207 208 if close_conditional: 209 print("#endif") 210 211 212for src in types: 213 for dst in types: 214 generate_default_conversion(src, dst, '') 215 216for src in int_types: 217 for dst in int_types: 218 for mode in rounding_modes: 219 generate_default_conversion(src, dst, mode) 220 221# 222# Saturated Conversions To Integers 223# 224# These functions are dependent on the unsaturated conversion functions 225# generated above, and use clamp, max, min, and select to eliminate 226# branching and vectorize the conversions. 227# 228# Again, as above, we allow all rounding modes for integer-to-integer 229# conversions with saturation. 230# 231 232def generate_saturated_conversion(src, dst, size): 233 # Header 234 close_conditional = conditional_guard(src, dst) 235 print("""_CLC_DEF _CLC_OVERLOAD 236{DST}{N} convert_{DST}{N}_sat({SRC}{N} x) 237{{""".format(DST=dst, SRC=src, N=size)) 238 239 # FIXME: This is a work around for lack of select function with 240 # signed third argument when the first two arguments are unsigned types. 241 # We cast to the signed type for sign-extension, then do a bitcast to 242 # the unsigned type. 243 if dst in unsigned_types: 244 bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(DST=dst, BOOL=bool_type[dst], N=size); 245 bool_suffix = ")" 246 else: 247 bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size); 248 bool_suffix = "" 249 250 # Body 251 if src == dst: 252 253 # Conversion between same types 254 print(" return x;") 255 256 elif src in float_types: 257 258 # Conversion from float to int 259 print(""" {DST}{N} y = convert_{DST}{N}(x); 260 y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS}); 261 y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS}); 262 return y;""".format(SRC=src, DST=dst, N=size, 263 DST_MIN=limit_min[dst], DST_MAX=limit_max[dst], 264 BP=bool_prefix, BS=bool_suffix)) 265 266 else: 267 268 # Integer to integer convesion with sizeof(src) == sizeof(dst) 269 if sizeof_type[src] == sizeof_type[dst]: 270 if src in unsigned_types: 271 print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst])) 272 else: 273 print(" x = max(x, ({SRC})0);".format(SRC=src)) 274 275 # Integer to integer conversion where sizeof(src) > sizeof(dst) 276 elif sizeof_type[src] > sizeof_type[dst]: 277 if src in unsigned_types: 278 print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst])) 279 else: 280 print(" x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});" 281 .format(SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst])) 282 283 # Integer to integer conversion where sizeof(src) < sizeof(dst) 284 elif src not in unsigned_types and dst in unsigned_types: 285 print(" x = max(x, ({SRC})0);".format(SRC=src)) 286 287 print(" return convert_{DST}{N}(x);".format(DST=dst, N=size)) 288 289 # Footer 290 print("}") 291 if close_conditional: 292 print("#endif") 293 294 295for src in types: 296 for dst in int_types: 297 for size in vector_sizes: 298 generate_saturated_conversion(src, dst, size) 299 300 301def generate_saturated_conversion_with_rounding(src, dst, size, mode): 302 # Header 303 close_conditional = conditional_guard(src, dst) 304 305 # Body 306 print("""_CLC_DEF _CLC_OVERLOAD 307{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x) 308{{ 309 return convert_{DST}{N}_sat(x); 310}} 311""".format(DST=dst, SRC=src, N=size, M=mode)) 312 313 # Footer 314 if close_conditional: 315 print("#endif") 316 317 318for src in int_types: 319 for dst in int_types: 320 for size in vector_sizes: 321 for mode in rounding_modes: 322 generate_saturated_conversion_with_rounding(src, dst, size, mode) 323 324# 325# Conversions To/From Floating-Point With Rounding 326# 327# Note that we assume as above that casts from floating-point to 328# integer are done with truncation, and that the default rounding 329# mode is fixed to round-to-nearest-even, as per C99 and OpenCL 330# rounding rules. 331# 332# These functions rely on the use of abs, ceil, fabs, floor, 333# nextafter, sign, rint and the above generated conversion functions. 334# 335# Only conversions to integers can have saturation. 336# 337 338def generate_float_conversion(src, dst, size, mode, sat): 339 # Header 340 close_conditional = conditional_guard(src, dst) 341 print("""_CLC_DEF _CLC_OVERLOAD 342{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x) 343{{""".format(SRC=src, DST=dst, N=size, M=mode, S=sat)) 344 345 # Perform conversion 346 if dst in int_types: 347 if mode == '_rte': 348 print(" x = rint(x);"); 349 elif mode == '_rtp': 350 print(" x = ceil(x);"); 351 elif mode == '_rtn': 352 print(" x = floor(x);"); 353 print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat)) 354 elif mode == '_rte': 355 print(" return convert_{DST}{N}(x);".format(DST=dst, N=size)) 356 else: 357 print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size)) 358 print(" {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size)) 359 if mode == '_rtz': 360 if src in int_types: 361 print(" {USRC}{N} abs_x = abs(x);".format(USRC=unsigned_type[src], N=size)) 362 print(" {USRC}{N} abs_y = abs(y);".format(USRC=unsigned_type[src], N=size)) 363 else: 364 print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size)) 365 print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size)) 366 print(" return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));" 367 .format(DST=dst, N=size, BOOL=bool_type[dst])) 368 if mode == '_rtp': 369 print(" return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));" 370 .format(DST=dst, N=size, BOOL=bool_type[dst])) 371 if mode == '_rtn': 372 print(" return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));" 373 .format(DST=dst, N=size, BOOL=bool_type[dst])) 374 375 # Footer 376 print("}") 377 if close_conditional: 378 print("#endif") 379 380 381for src in float_types: 382 for dst in int_types: 383 for size in vector_sizes: 384 for mode in rounding_modes: 385 for sat in saturation: 386 generate_float_conversion(src, dst, size, mode, sat) 387 388 389for src in types: 390 for dst in float_types: 391 for size in vector_sizes: 392 for mode in rounding_modes: 393 generate_float_conversion(src, dst, size, mode, '') 394