1 /* OpenCL runtime library: pocl_util utility functions
2 
3    Copyright (c) 2012 Pekka Jääskeläinen / Tampere University of Technology
4 
5    Permission is hereby granted, free of charge, to any person obtaining a copy
6    of this software and associated documentation files (the "Software"), to deal
7    in the Software without restriction, including without limitation the rights
8    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9    copies of the Software, and to permit persons to whom the Software is
10    furnished to do so, subject to the following conditions:
11 
12    The above copyright notice and this permission notice shall be included in
13    all copies or substantial portions of the Software.
14 
15    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21    THE SOFTWARE.
22 */
23 
24 #ifndef POCL_UTIL_H
25 #define POCL_UTIL_H
26 
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include "pocl_cl.h"
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 uint32_t byteswap_uint32_t (uint32_t word, char should_swap);
37 float byteswap_float (float word, char should_swap);
38 
39 /* set rounding mode */
40 POCL_EXPORT
41 void pocl_restore_rm (unsigned rm);
42 /* get current rounding mode */
43 POCL_EXPORT
44 unsigned pocl_save_rm ();
45 /* set OpenCL's default (round to nearest) rounding mode */
46 POCL_EXPORT
47 void pocl_set_default_rm ();
48 
49 
50 /* sets the flush-denorms-to-zero flag on the CPU, if supported */
51 POCL_EXPORT
52 void pocl_set_ftz (unsigned ftz);
53 /* saves / restores cpu flags*/
54 POCL_EXPORT
55 unsigned pocl_save_ftz (void);
56 POCL_EXPORT
57 void pocl_restore_ftz (unsigned ftz);
58 
59 void pocl_install_sigfpe_handler ();
60 void pocl_install_sigusr2_handler ();
61 
62 /* Finds the next highest power of two of the given value. */
63 size_t pocl_size_ceil2 (size_t x);
64 uint64_t pocl_size_ceil2_64 (uint64_t x);
65 
66 /* Allocates aligned blocks of memory.
67  *
68  * Uses posix_memalign when available. Otherwise, uses
69  * malloc to allocate a block of memory on the heap of the desired
70  * size which is then aligned with the given alignment. The resulting
71  * pointer must be freed with a call to pocl_aligned_free. Alignment
72  * must be a non-zero power of 2.
73  */
74 
75 POCL_EXPORT
76 void *pocl_aligned_malloc(size_t alignment, size_t size);
77 #define pocl_aligned_free(x) POCL_MEM_FREE(x)
78 
79 /* locks / unlocks two events in order of their event-id.
80  * This avoids any potential deadlocks of threads should
81  * they try to lock events in opposite order. */
82 void pocl_lock_events_inorder (cl_event ev1, cl_event ev2);
83 void pocl_unlock_events_inorder (cl_event ev1, cl_event ev2);
84 
85 /* Function for creating events */
86 cl_int pocl_create_event (cl_event *event, cl_command_queue command_queue,
87                           cl_command_type command_type, size_t num_buffers,
88                           const cl_mem* buffers, cl_context context);
89 
90 cl_int pocl_create_command (_cl_command_node **cmd,
91                             cl_command_queue command_queue,
92                             cl_command_type command_type, cl_event *event,
93                             cl_uint num_events, const cl_event *wait_list,
94                             size_t num_buffers, cl_mem *buffers,
95                             char *readonly_flags);
96 
97 cl_int pocl_create_command_migrate (_cl_command_node **cmd,
98                                     cl_command_queue command_queue,
99                                     cl_mem_migration_flags flags,
100                                     cl_event *event_p,
101                                     cl_uint num_events,
102                                     const cl_event *wait_list,
103                                     size_t num_buffers,
104                                     cl_mem *buffers,
105                                     char *readonly_flags);
106 
107 void pocl_command_enqueue (cl_command_queue command_queue,
108                           _cl_command_node *node);
109 
110 int pocl_alloc_or_retain_mem_host_ptr (cl_mem mem);
111 
112 int pocl_release_mem_host_ptr (cl_mem mem);
113 
114 /* does several sanity checks on buffer & given memory region */
115 int pocl_buffer_boundcheck(cl_mem buffer, size_t offset, size_t size);
116 /* same as above just 2 buffers */
117 int pocl_buffers_boundcheck(cl_mem src_buffer, cl_mem dst_buffer,
118                             size_t src_offset, size_t dst_offset, size_t size);
119 /* checks for overlapping regions in buffers, including overlapping subbuffers */
120 int pocl_buffers_overlap(cl_mem src_buffer, cl_mem dst_buffer,
121                             size_t src_offset, size_t dst_offset, size_t size);
122 
123 int pocl_buffer_boundcheck_3d(const size_t buffer_size, const size_t *origin,
124                               const size_t *region, size_t *row_pitch,
125                               size_t *slice_pitch, const char* prefix);
126 
127 int
128 check_copy_overlap(const size_t src_offset[3],
129                    const size_t dst_offset[3],
130                    const size_t region[3],
131                    const size_t row_pitch, const size_t slice_pitch);
132 
133 /**
134  * Push a command into ready list if all previous events are completed or
135  * in pending_list if the command still has pending dependencies
136  */
137 POCL_EXPORT
138 void
139 pocl_command_push (_cl_command_node *node,
140                    _cl_command_node **ready_list,
141                    _cl_command_node **pending_list);
142 
143 void pocl_unmap_command_finished (cl_device_id dev,
144                                   pocl_mem_identifier *mem_id, cl_mem mem,
145                                   mem_mapping_t *map);
146 
147 void pocl_unmap_command_finished2 (cl_event event, _cl_command_t *cmd);
148 
149 /**
150  * Return true if a command is ready to execute (no more event in wait list
151  * or false if not
152  */
153 static inline int
pocl_command_is_ready(cl_event event)154 pocl_command_is_ready(cl_event event)
155 {
156   return event->wait_list == NULL;
157 }
158 
159 typedef void (*empty_queue_callback) (cl_command_queue cq);
160 
161 void pocl_cl_mem_inherit_flags (cl_mem mem, cl_mem from_buffer,
162                                 cl_mem_flags flags);
163 
164 void pocl_setup_context(cl_context context);
165 
166 /* Helpers for dealing with devices / subdevices */
167 
168 cl_device_id pocl_real_dev (const cl_device_id);
169 cl_device_id * pocl_unique_device_list(const cl_device_id * in, cl_uint num, cl_uint *real);
170 int pocl_device_supports_builtin_kernel (cl_device_id dev,
171                                          const char *kernel_name);
172 
173 #define POCL_CHECK_DEV_IN_CMDQ                                                \
174   do                                                                          \
175     {                                                                         \
176       device = pocl_real_dev (command_queue->device);                         \
177       for (i = 0; i < command_queue->context->num_devices; ++i)               \
178         {                                                                     \
179           if (command_queue->context->devices[i] == device)                   \
180             break;                                                            \
181         }                                                                     \
182       assert (i < command_queue->context->num_devices);                       \
183     }                                                                         \
184   while (0)
185 
186 int pocl_check_event_wait_list(cl_command_queue     command_queue,
187                                cl_uint              num_events_in_wait_list,
188                                const cl_event *     event_wait_list);
189 
190 void pocl_abort_on_pthread_error (int status, unsigned line, const char *func);
191 
192 #define PTHREAD_CHECK(code)                                                   \
193   pocl_abort_on_pthread_error ((code), __LINE__, __FUNCTION__);
194 
195 void pocl_update_event_queued (cl_event event);
196 
197 POCL_EXPORT
198 void pocl_update_event_submitted (cl_event event);
199 
200 void pocl_update_event_running_unlocked (cl_event event);
201 
202 POCL_EXPORT
203 void pocl_update_event_running (cl_event event);
204 
205 POCL_EXPORT
206 void pocl_update_event_complete_msg (const char *func, unsigned line,
207                                      cl_event event, const char *msg);
208 
209 #define POCL_UPDATE_EVENT_COMPLETE_MSG(__event, msg)                          \
210   pocl_update_event_complete_msg (__func__, __LINE__, (__event), msg);
211 
212 #define POCL_UPDATE_EVENT_COMPLETE(__event)                                   \
213   pocl_update_event_complete_msg (__func__, __LINE__, (__event), NULL);
214 
215 POCL_EXPORT
216 void pocl_update_event_failed (cl_event event);
217 
218 const char*
219 pocl_status_to_str (int status);
220 
221 const char *
222 pocl_command_to_str (cl_command_type cmd);
223 
224 int
225 pocl_run_command(char * const *args);
226 
227 uint16_t float_to_half (float value);
228 
229 float half_to_float (uint16_t value);
230 
231 int bitcode_is_spirv_kernel (const char *bitcode, size_t size);
232 
233 #ifdef __cplusplus
234 }
235 #endif
236 
237 /* Common macro for cleaning up "*GetInfo" API call implementations.
238  * All the *GetInfo functions have been specified to look alike,
239  * and have been implemented to use the same variable names, so this
240  * code can be shared.
241  */
242 
243 #define POCL_RETURN_GETINFO_INNER(__SIZE__, MEMASSIGN)                        \
244   do                                                                          \
245     {                                                                         \
246       if (param_value)                                                        \
247         {                                                                     \
248           POCL_RETURN_ERROR_ON (                                              \
249               (param_value_size < __SIZE__), CL_INVALID_VALUE,                \
250               "param_value_size (%zu) smaller than actual size (%zu)\n",      \
251               param_value_size, __SIZE__);                                    \
252           MEMASSIGN;                                                          \
253         }                                                                     \
254       if (param_value_size_ret)                                               \
255         *param_value_size_ret = __SIZE__;                                     \
256       return CL_SUCCESS;                                                      \
257     }                                                                         \
258   while (0)
259 
260 #define POCL_RETURN_GETINFO_SIZE(__SIZE__, __POINTER__)                 \
261   POCL_RETURN_GETINFO_INNER(__SIZE__,                                   \
262     memcpy(param_value, __POINTER__, __SIZE__))
263 
264 #define POCL_RETURN_GETINFO_STR(__STR__)                                \
265   do                                                                    \
266     {                                                                   \
267       size_t const value_size = strlen(__STR__) + 1;                    \
268       POCL_RETURN_GETINFO_INNER(value_size,                             \
269                   memcpy(param_value, __STR__, value_size));            \
270     }                                                                   \
271   while (0)
272 
273 #define POCL_RETURN_GETINFO_STR_FREE(__STR__)                                 \
274   do                                                                          \
275     {                                                                         \
276       size_t const value_size = strlen (__STR__) + 1;                         \
277       if (param_value)                                                        \
278         {                                                                     \
279           if (param_value_size >= value_size)                                 \
280             memcpy (param_value, __STR__, value_size);                        \
281           POCL_MEM_FREE (__STR__);                                            \
282           if (param_value_size < value_size)                                  \
283             return CL_INVALID_VALUE;                                          \
284         }                                                                     \
285       else                                                                    \
286         POCL_MEM_FREE (__STR__);                                              \
287       if (param_value_size_ret)                                               \
288         *param_value_size_ret = value_size;                                   \
289       return CL_SUCCESS;                                                      \
290     }                                                                         \
291   while (0)
292 
293 #define POCL_RETURN_GETINFO(__TYPE__, __VALUE__)                        \
294   do                                                                    \
295     {                                                                   \
296       size_t const value_size = sizeof(__TYPE__);                       \
297       POCL_RETURN_GETINFO_INNER(value_size,                             \
298                   *(__TYPE__*)param_value=__VALUE__);                   \
299     }                                                                   \
300   while (0)
301 
302 #define POCL_RETURN_GETINFO_ARRAY(__TYPE__, __NUM__, __VALUE__)         \
303   do                                                                    \
304     {                                                                   \
305       size_t const value_size = __NUM__*sizeof(__TYPE__);               \
306       POCL_RETURN_GETINFO_INNER(value_size,                             \
307                   memcpy(param_value, __VALUE__, value_size));          \
308     }                                                                   \
309   while (0)
310 
311 #define IMAGE1D_TO_BUFFER(mem)                                                \
312   mem = ((mem->is_image && (mem->type == CL_MEM_OBJECT_IMAGE1D_BUFFER))       \
313              ? mem->buffer                                                    \
314              : mem);
315 
316 #define IS_IMAGE1D_BUFFER(mem)                                        \
317   (mem && mem->is_image && (mem->type == CL_MEM_OBJECT_IMAGE1D_BUFFER))
318 
319 #define IMAGE1D_ORIG_REG_TO_BYTES(mem, o, r)                                  \
320   size_t px = (mem->image_elem_size * mem->image_channels);                   \
321   size_t i1d_origin[3] = { o[0] * px, o[1], o[2] };                           \
322   size_t i1d_region[3] = { r[0] * px, r[1], r[2] };
323 
324 #endif
325