1# OpenCL built-in library: type conversion functions
2#
3# Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
4# Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
5#
6# Permission is hereby granted, free of charge, to any person obtaining a copy
7# of this software and associated documentation files (the "Software"), to deal
8# in the Software without restriction, including without limitation the rights
9# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10# copies of the Software, and to permit persons to whom the Software is
11# furnished to do so, subject to the following conditions:
12#
13# The above copyright notice and this permission notice shall be included in
14# all copies or substantial portions of the Software.
15#
16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22# THE SOFTWARE.
23
24# This script generates the file convert_type.cl, which contains all of the
25# OpenCL functions in the form:
26#
27# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
28
29types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double']
30int_types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong']
31unsigned_types = ['uchar', 'ushort', 'uint', 'ulong']
32float_types = ['float', 'double']
33int64_types = ['long', 'ulong']
34float64_types = ['double']
35vector_sizes = ['', '2', '3', '4', '8', '16']
36half_sizes = [('2',''), ('4','2'), ('8','4'), ('16','8')]
37
38saturation = ['','_sat']
39rounding_modes = ['_rtz','_rte','_rtp','_rtn']
40float_prefix = {'float':'FLT_', 'double':'DBL_'}
41float_suffix = {'float':'f', 'double':''}
42
43bool_type = {'char'  : 'char',
44             'uchar' : 'char',
45             'short' : 'short',
46             'ushort': 'short',
47             'int'   : 'int',
48             'uint'  : 'int',
49             'long'  : 'long',
50             'ulong' : 'long',
51             'float'  : 'int',
52             'double' : 'long'}
53
54unsigned_type = {'char'  : 'uchar',
55                 'uchar' : 'uchar',
56                 'short' : 'ushort',
57                 'ushort': 'ushort',
58                 'int'   : 'uint',
59                 'uint'  : 'uint',
60                 'long'  : 'ulong',
61                 'ulong' : 'ulong'}
62
63sizeof_type = {'char'  : 1, 'uchar'  : 1,
64               'short' : 2, 'ushort' : 2,
65               'int'   : 4, 'uint'   : 4,
66               'long'  : 8, 'ulong'  : 8,
67               'float' : 4, 'double' : 8}
68
69limit_max = {'char'  : 'CHAR_MAX',
70             'uchar' : 'UCHAR_MAX',
71             'short' : 'SHRT_MAX',
72             'ushort': 'USHRT_MAX',
73             'int'   : 'INT_MAX',
74             'uint'  : 'UINT_MAX',
75             'long'  : 'LONG_MAX',
76             'ulong' : 'ULONG_MAX'}
77
78limit_min = {'char'  : 'CHAR_MIN',
79             'uchar' : '0',
80             'short' : 'SHRT_MIN',
81             'ushort': '0',
82             'int'   : 'INT_MIN',
83             'uint'  : '0',
84             'long'  : 'LONG_MIN',
85             'ulong' : '0'}
86
87def conditional_guard(src, dst):
88  int64_count = 0
89  float64_count = 0
90  if src in int64_types:
91    int64_count = int64_count +1
92  elif src in float64_types:
93    float64_count = float64_count + 1
94  if dst in int64_types:
95    int64_count = int64_count +1
96  elif dst in float64_types:
97    float64_count = float64_count + 1
98  if float64_count > 0:
99    #In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
100    print("#ifdef cl_khr_fp64")
101    return True
102  elif int64_count > 0:
103    print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
104    return True
105  return False
106
107
108print("""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
109
110   DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
111   $ ./generate-conversion-type-cl.sh
112
113   OpenCL type conversion functions
114
115   Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
116   Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
117
118   Permission is hereby granted, free of charge, to any person obtaining a copy
119   of this software and associated documentation files (the "Software"), to deal
120   in the Software without restriction, including without limitation the rights
121   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
122   copies of the Software, and to permit persons to whom the Software is
123   furnished to do so, subject to the following conditions:
124
125   The above copyright notice and this permission notice shall be included in
126   all copies or substantial portions of the Software.
127
128   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
129   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
130   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
131   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
132   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
133   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
134   THE SOFTWARE.
135*/
136
137#include <clc/clc.h>
138
139#ifdef cl_khr_fp64
140#pragma OPENCL EXTENSION cl_khr_fp64 : enable
141
142#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
143#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
144#endif
145
146#endif
147
148#ifdef cles_khr_int64
149#pragma OPENCL EXTENSION cles_khr_int64 : enable
150#endif
151
152""")
153
154#
155# Default Conversions
156#
157# All conversions are in accordance with the OpenCL specification,
158# which cites the C99 conversion rules.
159#
160# Casting from floating point to integer results in conversions
161# with truncation, so it should be suitable for the default convert
162# functions.
163#
164# Conversions from integer to floating-point, and floating-point to
165# floating-point through casting is done with the default rounding
166# mode. While C99 allows dynamically changing the rounding mode
167# during runtime, it is not a supported feature in OpenCL according
168# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
169#
170# Therefore, we can assume for optimization purposes that the
171# rounding mode is fixed to round-to-nearest-even. Platform target
172# authors should ensure that the rounding-control registers remain
173# in this state, and that this invariant holds.
174#
175# Also note, even though the OpenCL specification isn't entirely
176# clear on this matter, we implement all rounding mode combinations
177# even for integer-to-integer conversions. When such a conversion
178# is used, the rounding mode is ignored.
179#
180
181def generate_default_conversion(src, dst, mode):
182  close_conditional = conditional_guard(src, dst)
183
184  # scalar conversions
185  print("""_CLC_DEF _CLC_OVERLOAD
186{DST} convert_{DST}{M}({SRC} x)
187{{
188  return ({DST})x;
189}}
190""".format(SRC=src, DST=dst, M=mode))
191
192  # vector conversions, done through decomposition to components
193  for size, half_size in half_sizes:
194    print("""_CLC_DEF _CLC_OVERLOAD
195{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
196{{
197  return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
198}}
199""".format(SRC=src, DST=dst, N=size, H=half_size, M=mode))
200
201  # 3-component vector conversions
202  print("""_CLC_DEF _CLC_OVERLOAD
203{DST}3 convert_{DST}3{M}({SRC}3 x)
204{{
205  return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
206}}""".format(SRC=src, DST=dst, M=mode))
207
208  if close_conditional:
209    print("#endif")
210
211
212for src in types:
213  for dst in types:
214    generate_default_conversion(src, dst, '')
215
216for src in int_types:
217  for dst in int_types:
218    for mode in rounding_modes:
219      generate_default_conversion(src, dst, mode)
220
221#
222# Saturated Conversions To Integers
223#
224# These functions are dependent on the unsaturated conversion functions
225# generated above, and use clamp, max, min, and select to eliminate
226# branching and vectorize the conversions.
227#
228# Again, as above, we allow all rounding modes for integer-to-integer
229# conversions with saturation.
230#
231
232def generate_saturated_conversion(src, dst, size):
233  # Header
234  close_conditional = conditional_guard(src, dst)
235  print("""_CLC_DEF _CLC_OVERLOAD
236{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
237{{""".format(DST=dst, SRC=src, N=size))
238
239  # FIXME: This is a work around for lack of select function with
240  # signed third argument when the first two arguments are unsigned types.
241  # We cast to the signed type for sign-extension, then do a bitcast to
242  # the unsigned type.
243  if dst in unsigned_types:
244    bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(DST=dst, BOOL=bool_type[dst], N=size);
245    bool_suffix = ")"
246  else:
247    bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size);
248    bool_suffix = ""
249
250  # Body
251  if src == dst:
252
253    # Conversion between same types
254    print("  return x;")
255
256  elif src in float_types:
257
258    # Conversion from float to int
259    print("""  {DST}{N} y = convert_{DST}{N}(x);
260  y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
261  y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
262  return y;""".format(SRC=src, DST=dst, N=size,
263      DST_MIN=limit_min[dst], DST_MAX=limit_max[dst],
264      BP=bool_prefix, BS=bool_suffix))
265
266  else:
267
268    # Integer to integer convesion with sizeof(src) == sizeof(dst)
269    if sizeof_type[src] == sizeof_type[dst]:
270      if src in unsigned_types:
271        print("  x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
272      else:
273        print("  x = max(x, ({SRC})0);".format(SRC=src))
274
275    # Integer to integer conversion where sizeof(src) > sizeof(dst)
276    elif sizeof_type[src] > sizeof_type[dst]:
277      if src in unsigned_types:
278        print("  x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
279      else:
280        print("  x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});"
281          .format(SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]))
282
283    # Integer to integer conversion where sizeof(src) < sizeof(dst)
284    elif src not in unsigned_types and dst in unsigned_types:
285        print("  x = max(x, ({SRC})0);".format(SRC=src))
286
287    print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
288
289  # Footer
290  print("}")
291  if close_conditional:
292    print("#endif")
293
294
295for src in types:
296  for dst in int_types:
297    for size in vector_sizes:
298      generate_saturated_conversion(src, dst, size)
299
300
301def generate_saturated_conversion_with_rounding(src, dst, size, mode):
302  # Header
303  close_conditional = conditional_guard(src, dst)
304
305  # Body
306  print("""_CLC_DEF _CLC_OVERLOAD
307{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
308{{
309  return convert_{DST}{N}_sat(x);
310}}
311""".format(DST=dst, SRC=src, N=size, M=mode))
312
313  # Footer
314  if close_conditional:
315    print("#endif")
316
317
318for src in int_types:
319  for dst in int_types:
320    for size in vector_sizes:
321      for mode in rounding_modes:
322        generate_saturated_conversion_with_rounding(src, dst, size, mode)
323
324#
325# Conversions To/From Floating-Point With Rounding
326#
327# Note that we assume as above that casts from floating-point to
328# integer are done with truncation, and that the default rounding
329# mode is fixed to round-to-nearest-even, as per C99 and OpenCL
330# rounding rules.
331#
332# These functions rely on the use of abs, ceil, fabs, floor,
333# nextafter, sign, rint and the above generated conversion functions.
334#
335# Only conversions to integers can have saturation.
336#
337
338def generate_float_conversion(src, dst, size, mode, sat):
339  # Header
340  close_conditional = conditional_guard(src, dst)
341  print("""_CLC_DEF _CLC_OVERLOAD
342{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
343{{""".format(SRC=src, DST=dst, N=size, M=mode, S=sat))
344
345  # Perform conversion
346  if dst in int_types:
347    if mode == '_rte':
348      print("  x = rint(x);");
349    elif mode == '_rtp':
350      print("  x = ceil(x);");
351    elif mode == '_rtn':
352      print("  x = floor(x);");
353    print("  return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
354  elif mode == '_rte':
355    print("  return convert_{DST}{N}(x);".format(DST=dst, N=size))
356  else:
357    print("  {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
358    print("  {SRC}{N} y = convert_{SRC}{N}(r);".format(SRC=src, N=size))
359    if mode == '_rtz':
360      if src in int_types:
361        print("  {USRC}{N} abs_x = abs(x);".format(USRC=unsigned_type[src], N=size))
362        print("  {USRC}{N} abs_y = abs(y);".format(USRC=unsigned_type[src], N=size))
363      else:
364        print("  {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
365        print("  {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
366      print("  return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));"
367        .format(DST=dst, N=size, BOOL=bool_type[dst]))
368    if mode == '_rtp':
369      print("  return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));"
370        .format(DST=dst, N=size, BOOL=bool_type[dst]))
371    if mode == '_rtn':
372      print("  return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));"
373        .format(DST=dst, N=size, BOOL=bool_type[dst]))
374
375  # Footer
376  print("}")
377  if close_conditional:
378    print("#endif")
379
380
381for src in float_types:
382  for dst in int_types:
383    for size in vector_sizes:
384      for mode in rounding_modes:
385        for sat in saturation:
386          generate_float_conversion(src, dst, size, mode, sat)
387
388
389for src in types:
390  for dst in float_types:
391    for size in vector_sizes:
392      for mode in rounding_modes:
393        generate_float_conversion(src, dst, size, mode, '')
394