1 /* OpenCL runtime library: pocl_util utility functions
2
3 Copyright (c) 2012 Pekka Jääskeläinen / Tampere University of Technology
4
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23
24 #ifndef POCL_UTIL_H
25 #define POCL_UTIL_H
26
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include "pocl_cl.h"
31
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35
36 uint32_t byteswap_uint32_t (uint32_t word, char should_swap);
37 float byteswap_float (float word, char should_swap);
38
39 /* set rounding mode */
40 POCL_EXPORT
41 void pocl_restore_rm (unsigned rm);
42 /* get current rounding mode */
43 POCL_EXPORT
44 unsigned pocl_save_rm ();
45 /* set OpenCL's default (round to nearest) rounding mode */
46 POCL_EXPORT
47 void pocl_set_default_rm ();
48
49
50 /* sets the flush-denorms-to-zero flag on the CPU, if supported */
51 POCL_EXPORT
52 void pocl_set_ftz (unsigned ftz);
53 /* saves / restores cpu flags*/
54 POCL_EXPORT
55 unsigned pocl_save_ftz (void);
56 POCL_EXPORT
57 void pocl_restore_ftz (unsigned ftz);
58
59 void pocl_install_sigfpe_handler ();
60 void pocl_install_sigusr2_handler ();
61
62 /* Finds the next highest power of two of the given value. */
63 size_t pocl_size_ceil2 (size_t x);
64 uint64_t pocl_size_ceil2_64 (uint64_t x);
65
66 /* Allocates aligned blocks of memory.
67 *
68 * Uses posix_memalign when available. Otherwise, uses
69 * malloc to allocate a block of memory on the heap of the desired
70 * size which is then aligned with the given alignment. The resulting
71 * pointer must be freed with a call to pocl_aligned_free. Alignment
72 * must be a non-zero power of 2.
73 */
74
75 POCL_EXPORT
76 void *pocl_aligned_malloc(size_t alignment, size_t size);
77 #define pocl_aligned_free(x) POCL_MEM_FREE(x)
78
79 /* locks / unlocks two events in order of their event-id.
80 * This avoids any potential deadlocks of threads should
81 * they try to lock events in opposite order. */
82 void pocl_lock_events_inorder (cl_event ev1, cl_event ev2);
83 void pocl_unlock_events_inorder (cl_event ev1, cl_event ev2);
84
85 /* Function for creating events */
86 cl_int pocl_create_event (cl_event *event, cl_command_queue command_queue,
87 cl_command_type command_type, size_t num_buffers,
88 const cl_mem* buffers, cl_context context);
89
90 cl_int pocl_create_command (_cl_command_node **cmd,
91 cl_command_queue command_queue,
92 cl_command_type command_type, cl_event *event,
93 cl_uint num_events, const cl_event *wait_list,
94 size_t num_buffers, cl_mem *buffers,
95 char *readonly_flags);
96
97 cl_int pocl_create_command_migrate (_cl_command_node **cmd,
98 cl_command_queue command_queue,
99 cl_mem_migration_flags flags,
100 cl_event *event_p,
101 cl_uint num_events,
102 const cl_event *wait_list,
103 size_t num_buffers,
104 cl_mem *buffers,
105 char *readonly_flags);
106
107 void pocl_command_enqueue (cl_command_queue command_queue,
108 _cl_command_node *node);
109
110 int pocl_alloc_or_retain_mem_host_ptr (cl_mem mem);
111
112 int pocl_release_mem_host_ptr (cl_mem mem);
113
114 /* does several sanity checks on buffer & given memory region */
115 int pocl_buffer_boundcheck(cl_mem buffer, size_t offset, size_t size);
116 /* same as above just 2 buffers */
117 int pocl_buffers_boundcheck(cl_mem src_buffer, cl_mem dst_buffer,
118 size_t src_offset, size_t dst_offset, size_t size);
119 /* checks for overlapping regions in buffers, including overlapping subbuffers */
120 int pocl_buffers_overlap(cl_mem src_buffer, cl_mem dst_buffer,
121 size_t src_offset, size_t dst_offset, size_t size);
122
123 int pocl_buffer_boundcheck_3d(const size_t buffer_size, const size_t *origin,
124 const size_t *region, size_t *row_pitch,
125 size_t *slice_pitch, const char* prefix);
126
127 int
128 check_copy_overlap(const size_t src_offset[3],
129 const size_t dst_offset[3],
130 const size_t region[3],
131 const size_t row_pitch, const size_t slice_pitch);
132
133 /**
134 * Push a command into ready list if all previous events are completed or
135 * in pending_list if the command still has pending dependencies
136 */
137 POCL_EXPORT
138 void
139 pocl_command_push (_cl_command_node *node,
140 _cl_command_node **ready_list,
141 _cl_command_node **pending_list);
142
143 void pocl_unmap_command_finished (cl_device_id dev,
144 pocl_mem_identifier *mem_id, cl_mem mem,
145 mem_mapping_t *map);
146
147 void pocl_unmap_command_finished2 (cl_event event, _cl_command_t *cmd);
148
149 /**
150 * Return true if a command is ready to execute (no more event in wait list
151 * or false if not
152 */
153 static inline int
pocl_command_is_ready(cl_event event)154 pocl_command_is_ready(cl_event event)
155 {
156 return event->wait_list == NULL;
157 }
158
159 typedef void (*empty_queue_callback) (cl_command_queue cq);
160
161 void pocl_cl_mem_inherit_flags (cl_mem mem, cl_mem from_buffer,
162 cl_mem_flags flags);
163
164 void pocl_setup_context(cl_context context);
165
166 /* Helpers for dealing with devices / subdevices */
167
168 cl_device_id pocl_real_dev (const cl_device_id);
169 cl_device_id * pocl_unique_device_list(const cl_device_id * in, cl_uint num, cl_uint *real);
170 int pocl_device_supports_builtin_kernel (cl_device_id dev,
171 const char *kernel_name);
172
173 #define POCL_CHECK_DEV_IN_CMDQ \
174 do \
175 { \
176 device = pocl_real_dev (command_queue->device); \
177 for (i = 0; i < command_queue->context->num_devices; ++i) \
178 { \
179 if (command_queue->context->devices[i] == device) \
180 break; \
181 } \
182 assert (i < command_queue->context->num_devices); \
183 } \
184 while (0)
185
186 int pocl_check_event_wait_list(cl_command_queue command_queue,
187 cl_uint num_events_in_wait_list,
188 const cl_event * event_wait_list);
189
190 void pocl_abort_on_pthread_error (int status, unsigned line, const char *func);
191
192 #define PTHREAD_CHECK(code) \
193 pocl_abort_on_pthread_error ((code), __LINE__, __FUNCTION__);
194
195 void pocl_update_event_queued (cl_event event);
196
197 POCL_EXPORT
198 void pocl_update_event_submitted (cl_event event);
199
200 void pocl_update_event_running_unlocked (cl_event event);
201
202 POCL_EXPORT
203 void pocl_update_event_running (cl_event event);
204
205 POCL_EXPORT
206 void pocl_update_event_complete_msg (const char *func, unsigned line,
207 cl_event event, const char *msg);
208
209 #define POCL_UPDATE_EVENT_COMPLETE_MSG(__event, msg) \
210 pocl_update_event_complete_msg (__func__, __LINE__, (__event), msg);
211
212 #define POCL_UPDATE_EVENT_COMPLETE(__event) \
213 pocl_update_event_complete_msg (__func__, __LINE__, (__event), NULL);
214
215 POCL_EXPORT
216 void pocl_update_event_failed (cl_event event);
217
218 const char*
219 pocl_status_to_str (int status);
220
221 const char *
222 pocl_command_to_str (cl_command_type cmd);
223
224 int
225 pocl_run_command(char * const *args);
226
227 uint16_t float_to_half (float value);
228
229 float half_to_float (uint16_t value);
230
231 int bitcode_is_spirv_kernel (const char *bitcode, size_t size);
232
233 #ifdef __cplusplus
234 }
235 #endif
236
237 /* Common macro for cleaning up "*GetInfo" API call implementations.
238 * All the *GetInfo functions have been specified to look alike,
239 * and have been implemented to use the same variable names, so this
240 * code can be shared.
241 */
242
243 #define POCL_RETURN_GETINFO_INNER(__SIZE__, MEMASSIGN) \
244 do \
245 { \
246 if (param_value) \
247 { \
248 POCL_RETURN_ERROR_ON ( \
249 (param_value_size < __SIZE__), CL_INVALID_VALUE, \
250 "param_value_size (%zu) smaller than actual size (%zu)\n", \
251 param_value_size, __SIZE__); \
252 MEMASSIGN; \
253 } \
254 if (param_value_size_ret) \
255 *param_value_size_ret = __SIZE__; \
256 return CL_SUCCESS; \
257 } \
258 while (0)
259
260 #define POCL_RETURN_GETINFO_SIZE(__SIZE__, __POINTER__) \
261 POCL_RETURN_GETINFO_INNER(__SIZE__, \
262 memcpy(param_value, __POINTER__, __SIZE__))
263
264 #define POCL_RETURN_GETINFO_STR(__STR__) \
265 do \
266 { \
267 size_t const value_size = strlen(__STR__) + 1; \
268 POCL_RETURN_GETINFO_INNER(value_size, \
269 memcpy(param_value, __STR__, value_size)); \
270 } \
271 while (0)
272
273 #define POCL_RETURN_GETINFO_STR_FREE(__STR__) \
274 do \
275 { \
276 size_t const value_size = strlen (__STR__) + 1; \
277 if (param_value) \
278 { \
279 if (param_value_size >= value_size) \
280 memcpy (param_value, __STR__, value_size); \
281 POCL_MEM_FREE (__STR__); \
282 if (param_value_size < value_size) \
283 return CL_INVALID_VALUE; \
284 } \
285 else \
286 POCL_MEM_FREE (__STR__); \
287 if (param_value_size_ret) \
288 *param_value_size_ret = value_size; \
289 return CL_SUCCESS; \
290 } \
291 while (0)
292
293 #define POCL_RETURN_GETINFO(__TYPE__, __VALUE__) \
294 do \
295 { \
296 size_t const value_size = sizeof(__TYPE__); \
297 POCL_RETURN_GETINFO_INNER(value_size, \
298 *(__TYPE__*)param_value=__VALUE__); \
299 } \
300 while (0)
301
302 #define POCL_RETURN_GETINFO_ARRAY(__TYPE__, __NUM__, __VALUE__) \
303 do \
304 { \
305 size_t const value_size = __NUM__*sizeof(__TYPE__); \
306 POCL_RETURN_GETINFO_INNER(value_size, \
307 memcpy(param_value, __VALUE__, value_size)); \
308 } \
309 while (0)
310
311 #define IMAGE1D_TO_BUFFER(mem) \
312 mem = ((mem->is_image && (mem->type == CL_MEM_OBJECT_IMAGE1D_BUFFER)) \
313 ? mem->buffer \
314 : mem);
315
316 #define IS_IMAGE1D_BUFFER(mem) \
317 (mem && mem->is_image && (mem->type == CL_MEM_OBJECT_IMAGE1D_BUFFER))
318
319 #define IMAGE1D_ORIG_REG_TO_BYTES(mem, o, r) \
320 size_t px = (mem->image_elem_size * mem->image_channels); \
321 size_t i1d_origin[3] = { o[0] * px, o[1], o[2] }; \
322 size_t i1d_region[3] = { r[0] * px, r[1], r[2] };
323
324 #endif
325