1 #include "config.h"
2
3 #include <assert.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <unistd.h>
7
8 #include "pocl_cl.h"
9 #include "utlist.h"
10
11 // for pocl_aligned_malloc
12 #include "pocl_util.h"
13
14 #ifdef ENABLE_LLVM
15 #include "pocl_llvm.h"
16 #endif
17
18 #include "common_driver.h"
19
20 #define APPEND_TO_BUILD_LOG_RET(err, ...) \
21 do \
22 { \
23 char temp[1024]; \
24 ssize_t written = snprintf (temp, 1024, __VA_ARGS__); \
25 if (written > 0) \
26 { \
27 size_t l = strlen (program->build_log[device_i]); \
28 size_t newl = l + (size_t)written; \
29 char *newp = realloc (program->build_log[device_i], newl); \
30 assert (newp); \
31 memcpy (newp + l, temp, (size_t)written); \
32 newp[newl] = 0; \
33 program->build_log[device_i] = newp; \
34 } \
35 POCL_RETURN_ERROR_ON (1, err, __VA_ARGS__); \
36 } \
37 while (0)
38
39 void
pocl_driver_read(void * data,void * __restrict__ host_ptr,pocl_mem_identifier * src_mem_id,cl_mem src_buf,size_t offset,size_t size)40 pocl_driver_read (void *data, void *__restrict__ host_ptr,
41 pocl_mem_identifier *src_mem_id, cl_mem src_buf,
42 size_t offset, size_t size)
43 {
44 void *__restrict__ device_ptr = src_mem_id->mem_ptr;
45 if (host_ptr == device_ptr)
46 return;
47
48 memcpy (host_ptr, (char *)device_ptr + offset, size);
49 }
50
51 void
pocl_driver_write(void * data,const void * __restrict__ host_ptr,pocl_mem_identifier * dst_mem_id,cl_mem dst_buf,size_t offset,size_t size)52 pocl_driver_write (void *data, const void *__restrict__ host_ptr,
53 pocl_mem_identifier *dst_mem_id, cl_mem dst_buf,
54 size_t offset, size_t size)
55 {
56 void *__restrict__ device_ptr = dst_mem_id->mem_ptr;
57 if (host_ptr == device_ptr)
58 return;
59
60 memcpy ((char *)device_ptr + offset, host_ptr, size);
61 }
62
63 void
pocl_driver_copy(void * data,pocl_mem_identifier * dst_mem_id,cl_mem dst_buf,pocl_mem_identifier * src_mem_id,cl_mem src_buf,size_t dst_offset,size_t src_offset,size_t size)64 pocl_driver_copy (void *data, pocl_mem_identifier *dst_mem_id, cl_mem dst_buf,
65 pocl_mem_identifier *src_mem_id, cl_mem src_buf,
66 size_t dst_offset, size_t src_offset, size_t size)
67 {
68 char *__restrict__ src_ptr = (char *)src_mem_id->mem_ptr;
69 char *__restrict__ dst_ptr = (char *)dst_mem_id->mem_ptr;
70 if ((src_ptr + src_offset) == (dst_ptr + dst_offset))
71 return;
72
73 memcpy (dst_ptr + dst_offset, src_ptr + src_offset, size);
74 }
75
76 void
pocl_driver_copy_with_size(void * data,pocl_mem_identifier * dst_mem_id,cl_mem dst_buf,pocl_mem_identifier * src_mem_id,cl_mem src_buf,pocl_mem_identifier * content_size_buf_mem_id,cl_mem content_size_buf,size_t dst_offset,size_t src_offset,size_t size)77 pocl_driver_copy_with_size (void *data, pocl_mem_identifier *dst_mem_id,
78 cl_mem dst_buf, pocl_mem_identifier *src_mem_id,
79 cl_mem src_buf,
80 pocl_mem_identifier *content_size_buf_mem_id,
81 cl_mem content_size_buf, size_t dst_offset,
82 size_t src_offset, size_t size)
83 {
84 char *__restrict__ src_ptr = (char *)src_mem_id->mem_ptr;
85 char *__restrict__ dst_ptr = (char *)dst_mem_id->mem_ptr;
86 if ((src_ptr + src_offset) == (dst_ptr + dst_offset))
87 return;
88
89 uint64_t *content_size = (uint64_t *)content_size_buf_mem_id->mem_ptr;
90 if (*content_size < (src_offset + size))
91 {
92 if (*content_size > src_offset)
93 {
94 size_t real_bytes = *content_size - src_offset;
95 size_t to_copy = real_bytes < size ? real_bytes : size;
96 memcpy (dst_ptr + dst_offset, src_ptr + src_offset, to_copy);
97 }
98 }
99 else
100 memcpy (dst_ptr + dst_offset, src_ptr + src_offset, size);
101 }
102
103 void
pocl_driver_copy_rect(void * data,pocl_mem_identifier * dst_mem_id,cl_mem dst_buf,pocl_mem_identifier * src_mem_id,cl_mem src_buf,const size_t * __restrict__ const dst_origin,const size_t * __restrict__ const src_origin,const size_t * __restrict__ const region,size_t const dst_row_pitch,size_t const dst_slice_pitch,size_t const src_row_pitch,size_t const src_slice_pitch)104 pocl_driver_copy_rect (void *data, pocl_mem_identifier *dst_mem_id,
105 cl_mem dst_buf, pocl_mem_identifier *src_mem_id,
106 cl_mem src_buf,
107 const size_t *__restrict__ const dst_origin,
108 const size_t *__restrict__ const src_origin,
109 const size_t *__restrict__ const region,
110 size_t const dst_row_pitch,
111 size_t const dst_slice_pitch,
112 size_t const src_row_pitch,
113 size_t const src_slice_pitch)
114 {
115
116 void *__restrict__ src_ptr = src_mem_id->mem_ptr;
117 void *__restrict__ dst_ptr = dst_mem_id->mem_ptr;
118 char const *__restrict const adjusted_src_ptr
119 = (char const *)src_ptr + src_origin[0] + src_row_pitch * src_origin[1]
120 + src_slice_pitch * src_origin[2];
121 char *__restrict__ const adjusted_dst_ptr
122 = (char *)dst_ptr + dst_origin[0] + dst_row_pitch * dst_origin[1]
123 + dst_slice_pitch * dst_origin[2];
124
125 POCL_MSG_PRINT_MEMORY (
126 "COPY RECT \n"
127 "SRC %p DST %p SIZE %zu\n"
128 "src origin %u %u %u dst origin %u %u %u \n"
129 "src_row_pitch %lu src_slice pitch %lu\n"
130 "dst_row_pitch %lu dst_slice_pitch %lu\n"
131 "reg[0] %lu reg[1] %lu reg[2] %lu\n",
132 adjusted_src_ptr, adjusted_dst_ptr, region[0] * region[1] * region[2],
133 (unsigned)src_origin[0], (unsigned)src_origin[1],
134 (unsigned)src_origin[2], (unsigned)dst_origin[0],
135 (unsigned)dst_origin[1], (unsigned)dst_origin[2],
136 (unsigned long)src_row_pitch, (unsigned long)src_slice_pitch,
137 (unsigned long)dst_row_pitch, (unsigned long)dst_slice_pitch,
138 (unsigned long)region[0], (unsigned long)region[1],
139 (unsigned long)region[2]);
140
141 size_t j, k;
142
143 /* TODO: handle overlaping regions */
144 if ((src_row_pitch == dst_row_pitch && dst_row_pitch == region[0])
145 && (src_slice_pitch == dst_slice_pitch
146 && dst_slice_pitch == (region[1] * region[0])))
147 {
148 memcpy (adjusted_dst_ptr, adjusted_src_ptr,
149 region[2] * region[1] * region[0]);
150 }
151 else
152 {
153 for (k = 0; k < region[2]; ++k)
154 for (j = 0; j < region[1]; ++j)
155 memcpy (adjusted_dst_ptr + dst_row_pitch * j + dst_slice_pitch * k,
156 adjusted_src_ptr + src_row_pitch * j + src_slice_pitch * k,
157 region[0]);
158 }
159 }
160
161 void
pocl_driver_write_rect(void * data,const void * __restrict__ const host_ptr,pocl_mem_identifier * dst_mem_id,cl_mem dst_buf,const size_t * __restrict__ const buffer_origin,const size_t * __restrict__ const host_origin,const size_t * __restrict__ const region,size_t const buffer_row_pitch,size_t const buffer_slice_pitch,size_t const host_row_pitch,size_t const host_slice_pitch)162 pocl_driver_write_rect (void *data, const void *__restrict__ const host_ptr,
163 pocl_mem_identifier *dst_mem_id, cl_mem dst_buf,
164 const size_t *__restrict__ const buffer_origin,
165 const size_t *__restrict__ const host_origin,
166 const size_t *__restrict__ const region,
167 size_t const buffer_row_pitch,
168 size_t const buffer_slice_pitch,
169 size_t const host_row_pitch,
170 size_t const host_slice_pitch)
171 {
172 void *__restrict__ device_ptr = dst_mem_id->mem_ptr;
173
174 char *__restrict const adjusted_device_ptr
175 = (char *)device_ptr + buffer_origin[0]
176 + buffer_row_pitch * buffer_origin[1]
177 + buffer_slice_pitch * buffer_origin[2];
178 char const *__restrict__ const adjusted_host_ptr
179 = (char const *)host_ptr + host_origin[0]
180 + host_row_pitch * host_origin[1] + host_slice_pitch * host_origin[2];
181
182 POCL_MSG_PRINT_MEMORY (
183 "WRITE RECT \n"
184 "SRC HOST %p DST DEV %p SIZE %zu\n"
185 "borigin %u %u %u horigin %u %u %u \n"
186 "row_pitch %lu slice pitch \n"
187 "%lu host_row_pitch %lu host_slice_pitch %lu\n"
188 "reg[0] %lu reg[1] %lu reg[2] %lu\n",
189 adjusted_host_ptr, adjusted_device_ptr,
190 region[0] * region[1] * region[2], (unsigned)buffer_origin[0],
191 (unsigned)buffer_origin[1], (unsigned)buffer_origin[2],
192 (unsigned)host_origin[0], (unsigned)host_origin[1],
193 (unsigned)host_origin[2], (unsigned long)buffer_row_pitch,
194 (unsigned long)buffer_slice_pitch, (unsigned long)host_row_pitch,
195 (unsigned long)host_slice_pitch, (unsigned long)region[0],
196 (unsigned long)region[1], (unsigned long)region[2]);
197
198 size_t j, k;
199
200 /* TODO: handle overlaping regions */
201 if ((buffer_row_pitch == host_row_pitch && host_row_pitch == region[0])
202 && (buffer_slice_pitch == host_slice_pitch
203 && host_slice_pitch == (region[1] * region[0])))
204 {
205 memcpy (adjusted_device_ptr, adjusted_host_ptr,
206 region[2] * region[1] * region[0]);
207 }
208 else
209 {
210 for (k = 0; k < region[2]; ++k)
211 for (j = 0; j < region[1]; ++j)
212 memcpy (adjusted_device_ptr + buffer_row_pitch * j
213 + buffer_slice_pitch * k,
214 adjusted_host_ptr + host_row_pitch * j
215 + host_slice_pitch * k,
216 region[0]);
217 }
218 }
219
220 void
pocl_driver_read_rect(void * data,void * __restrict__ const host_ptr,pocl_mem_identifier * src_mem_id,cl_mem src_buf,const size_t * __restrict__ const buffer_origin,const size_t * __restrict__ const host_origin,const size_t * __restrict__ const region,size_t const buffer_row_pitch,size_t const buffer_slice_pitch,size_t const host_row_pitch,size_t const host_slice_pitch)221 pocl_driver_read_rect (void *data, void *__restrict__ const host_ptr,
222 pocl_mem_identifier *src_mem_id, cl_mem src_buf,
223 const size_t *__restrict__ const buffer_origin,
224 const size_t *__restrict__ const host_origin,
225 const size_t *__restrict__ const region,
226 size_t const buffer_row_pitch,
227 size_t const buffer_slice_pitch,
228 size_t const host_row_pitch,
229 size_t const host_slice_pitch)
230 {
231 void *__restrict__ device_ptr = src_mem_id->mem_ptr;
232
233 char const *__restrict const adjusted_device_ptr
234 = (char const *)device_ptr + buffer_origin[2] * buffer_slice_pitch
235 + buffer_origin[1] * buffer_row_pitch + buffer_origin[0];
236 char *__restrict__ const adjusted_host_ptr
237 = (char *)host_ptr + host_origin[2] * host_slice_pitch
238 + host_origin[1] * host_row_pitch + host_origin[0];
239
240 POCL_MSG_PRINT_MEMORY (
241 "READ RECT \n"
242 "SRC DEV %p DST HOST %p SIZE %zu\n"
243 "borigin %u %u %u horigin %u %u %u row_pitch %lu slice pitch "
244 "%lu host_row_pitch %lu host_slice_pitch %lu\n"
245 "reg[0] %lu reg[1] %lu reg[2] %lu\n",
246 adjusted_device_ptr, adjusted_host_ptr,
247 region[0] * region[1] * region[2], (unsigned)buffer_origin[0],
248 (unsigned)buffer_origin[1], (unsigned)buffer_origin[2],
249 (unsigned)host_origin[0], (unsigned)host_origin[1],
250 (unsigned)host_origin[2], (unsigned long)buffer_row_pitch,
251 (unsigned long)buffer_slice_pitch, (unsigned long)host_row_pitch,
252 (unsigned long)host_slice_pitch, (unsigned long)region[0],
253 (unsigned long)region[1], (unsigned long)region[2]);
254
255 size_t j, k;
256
257 /* TODO: handle overlaping regions */
258 if ((buffer_row_pitch == host_row_pitch && host_row_pitch == region[0])
259 && (buffer_slice_pitch == host_slice_pitch
260 && host_slice_pitch == (region[1] * region[0])))
261 {
262 memcpy (adjusted_host_ptr, adjusted_device_ptr,
263 region[2] * region[1] * region[0]);
264 }
265 else
266 {
267 for (k = 0; k < region[2]; ++k)
268 for (j = 0; j < region[1]; ++j)
269 memcpy (adjusted_host_ptr + host_row_pitch * j
270 + host_slice_pitch * k,
271 adjusted_device_ptr + buffer_row_pitch * j
272 + buffer_slice_pitch * k,
273 region[0]);
274 }
275 }
276
277 void
pocl_driver_memfill(void * data,pocl_mem_identifier * dst_mem_id,cl_mem dst_buf,size_t size,size_t offset,const void * __restrict__ pattern,size_t pattern_size)278 pocl_driver_memfill (void *data, pocl_mem_identifier *dst_mem_id,
279 cl_mem dst_buf, size_t size, size_t offset,
280 const void *__restrict__ pattern, size_t pattern_size)
281 {
282 void *__restrict__ ptr = dst_mem_id->mem_ptr;
283 size_t i;
284 unsigned j;
285
286 /* memfill size is in bytes, we wanto make it into elements */
287 size /= pattern_size;
288 offset /= pattern_size;
289
290 switch (pattern_size)
291 {
292 case 1:
293 {
294 uint8_t *p = (uint8_t *)ptr + offset;
295 for (i = 0; i < size; i++)
296 p[i] = *(uint8_t *)pattern;
297 }
298 break;
299 case 2:
300 {
301 uint16_t *p = (uint16_t *)ptr + offset;
302 for (i = 0; i < size; i++)
303 p[i] = *(uint16_t *)pattern;
304 }
305 break;
306 case 4:
307 {
308 uint32_t *p = (uint32_t *)ptr + offset;
309 for (i = 0; i < size; i++)
310 p[i] = *(uint32_t *)pattern;
311 }
312 break;
313 case 8:
314 {
315 uint64_t *p = (uint64_t *)ptr + offset;
316 for (i = 0; i < size; i++)
317 p[i] = *(uint64_t *)pattern;
318 }
319 break;
320 case 16:
321 {
322 uint64_t *p = (uint64_t *)ptr + (offset << 1);
323 for (i = 0; i < size; i++)
324 for (j = 0; j < 2; j++)
325 p[(i << 1) + j] = *((uint64_t *)pattern + j);
326 }
327 break;
328 case 32:
329 {
330 uint64_t *p = (uint64_t *)ptr + (offset << 2);
331 for (i = 0; i < size; i++)
332 for (j = 0; j < 4; j++)
333 p[(i << 2) + j] = *((uint64_t *)pattern + j);
334 }
335 break;
336 case 64:
337 {
338 uint64_t *p = (uint64_t *)ptr + (offset << 3);
339 for (i = 0; i < size; i++)
340 for (j = 0; j < 8; j++)
341 p[(i << 3) + j] = *((uint64_t *)pattern + j);
342 }
343 break;
344 case 128:
345 {
346 uint64_t *p = (uint64_t *)ptr + (offset << 4);
347 for (i = 0; i < size; i++)
348 for (j = 0; j < 16; j++)
349 p[(i << 4) + j] = *((uint64_t *)pattern + j);
350 }
351 break;
352 default:
353 assert (0 && "Invalid pattern size");
354 break;
355 }
356 }
357
358 cl_int
pocl_driver_map_mem(void * data,pocl_mem_identifier * src_mem_id,cl_mem src_buf,mem_mapping_t * map)359 pocl_driver_map_mem (void *data, pocl_mem_identifier *src_mem_id,
360 cl_mem src_buf, mem_mapping_t *map)
361 {
362 char *__restrict__ src_device_ptr = (char *)src_mem_id->mem_ptr;
363 assert (map->host_ptr);
364
365 if (map->map_flags & CL_MAP_WRITE_INVALIDATE_REGION)
366 {
367 return CL_SUCCESS;
368 }
369
370 if (map->host_ptr == (src_device_ptr + map->offset))
371 NULL;
372 else
373 memcpy (map->host_ptr, src_device_ptr + map->offset, map->size);
374
375 return CL_SUCCESS;
376 }
377
378 cl_int
pocl_driver_unmap_mem(void * data,pocl_mem_identifier * dst_mem_id,cl_mem dst_buf,mem_mapping_t * map)379 pocl_driver_unmap_mem (void *data, pocl_mem_identifier *dst_mem_id,
380 cl_mem dst_buf, mem_mapping_t *map)
381 {
382 char *__restrict__ dst_device_ptr = (char *)dst_mem_id->mem_ptr;
383 assert (map->host_ptr);
384
385 if (map->host_ptr == (dst_device_ptr + map->offset))
386 NULL;
387 else
388 {
389 if (map->map_flags != CL_MAP_READ)
390 memcpy (dst_device_ptr + map->offset, map->host_ptr, map->size);
391 }
392
393 return CL_SUCCESS;
394 }
395
396 cl_int
pocl_driver_get_mapping_ptr(void * data,pocl_mem_identifier * mem_id,cl_mem mem,mem_mapping_t * map)397 pocl_driver_get_mapping_ptr (void *data, pocl_mem_identifier *mem_id,
398 cl_mem mem, mem_mapping_t *map)
399 {
400 char *__restrict__ src_device_ptr = (char *)mem_id->mem_ptr;
401 assert (mem->size > 0);
402 assert (map->size > 0);
403
404 if (mem->mem_host_ptr != NULL)
405 {
406 map->host_ptr = mem->mem_host_ptr + map->offset;
407 }
408 else
409 {
410 map->host_ptr = pocl_aligned_malloc (16, map->size);
411 }
412
413 assert (map->host_ptr);
414 return CL_SUCCESS;
415 }
416
417 cl_int
pocl_driver_free_mapping_ptr(void * data,pocl_mem_identifier * mem_id,cl_mem mem,mem_mapping_t * map)418 pocl_driver_free_mapping_ptr (void *data, pocl_mem_identifier *mem_id,
419 cl_mem mem, mem_mapping_t *map)
420 {
421 char *__restrict__ src_device_ptr = (char *)mem_id->mem_ptr;
422 if (map->host_ptr == NULL)
423 return CL_SUCCESS;
424
425 if ((mem->mem_host_ptr != NULL)
426 && map->host_ptr != (mem->mem_host_ptr + map->offset))
427 pocl_aligned_free (map->host_ptr);
428
429 map->host_ptr = NULL;
430 return CL_SUCCESS;
431 }
432
433 /* These are implementations of compilation callbacks for all devices
434 * that support compilation via LLVM. They take care of compilation/linking
435 * of source/binary/spir down to parallel.bc level.
436 *
437 * The driver only has to provide the "device->ops->compile_kernel" callback,
438 * which compiles parallel.bc to whatever final binary format is needed.
439 *
440 * Devices that support compilation by other means than LLVM,
441 * must reimplement these callbacks.
442 */
443
444 #ifdef ENABLE_LLVM
445 /* Converts SPIR to LLVM IR, and links it to pocl's kernel library. */
446 static int
pocl_llvm_link_and_convert_spir(cl_program program,cl_uint device_i,int link_program,int spir_build)447 pocl_llvm_link_and_convert_spir (cl_program program, cl_uint device_i,
448 int link_program, int spir_build)
449 {
450 cl_device_id device = program->devices[device_i];
451 int error;
452
453 /* SPIR-V was handled; bitcode is now either plain LLVM IR or SPIR IR */
454 int spir_binary
455 = bitcode_is_triple ((char *)program->binaries[device_i],
456 program->binary_sizes[device_i], "spir");
457 if (spir_binary)
458 POCL_MSG_PRINT_LLVM ("LLVM-SPIR binary detected\n");
459 else
460 POCL_MSG_PRINT_LLVM ("building from a BC binary for device %d\n",
461 device_i);
462
463 if (spir_binary)
464 {
465 #ifdef ENABLE_SPIR
466 if (!strstr (device->extensions, "cl_khr_spir"))
467 {
468 APPEND_TO_BUILD_LOG_RET (CL_LINK_PROGRAM_FAILURE,
469 "SPIR support is not available"
470 "for device %s\n",
471 device->short_name);
472 }
473 if (!spir_build)
474 POCL_MSG_WARN ("SPIR binary provided, but no spir in build options\n");
475
476 /* SPIR binaries need to be explicitly linked to the kernel
477 * library. For non-SPIR binaries this happens as part of build
478 * process when program.bc is generated. */
479 error = pocl_llvm_link_program (
480 program, device_i, 1, &program->binaries[device_i],
481 &program->binary_sizes[device_i], NULL, link_program, 1);
482
483 POCL_RETURN_ERROR_ON (error, CL_LINK_PROGRAM_FAILURE,
484 "Failed to link SPIR program.bc\n");
485 #else
486 APPEND_TO_BUILD_LOG_RET (CL_LINK_PROGRAM_FAILURE,
487 "SPIR support is not available"
488 "for device %s\n",
489 device->short_name);
490 #endif
491 }
492 return CL_SUCCESS;
493 }
494 #endif
495
496 int
pocl_driver_build_source(cl_program program,cl_uint device_i,cl_uint num_input_headers,const cl_program * input_headers,const char ** header_include_names,int link_program)497 pocl_driver_build_source (cl_program program, cl_uint device_i,
498 cl_uint num_input_headers,
499 const cl_program *input_headers,
500 const char **header_include_names, int link_program)
501 {
502 assert (program->devices[device_i]->compiler_available == CL_TRUE);
503 assert (program->devices[device_i]->linker_available == CL_TRUE);
504
505 #ifdef ENABLE_LLVM
506
507 POCL_MSG_PRINT_LLVM ("building from sources for device %d\n", device_i);
508
509 return pocl_llvm_build_program (program, device_i, num_input_headers,
510 input_headers, header_include_names,
511 link_program);
512
513 #else
514 POCL_RETURN_ERROR_ON (1, CL_BUILD_PROGRAM_FAILURE,
515 "This device requires LLVM to build from sources\n");
516 #endif
517 }
518
519 int
pocl_driver_build_binary(cl_program program,cl_uint device_i,int link_program,int spir_build)520 pocl_driver_build_binary (cl_program program, cl_uint device_i,
521 int link_program, int spir_build)
522 {
523
524 #ifdef ENABLE_LLVM
525 /* poclbinary doesn't need special handling */
526 if (program->pocl_binaries[device_i])
527 {
528 /* program.bc must be either NULL or unpacked by now */
529 if (program->binaries[device_i] == NULL)
530 POCL_MSG_WARN ("pocl-binary for this device doesn't contain "
531 "program.bc - you won't be able to rebuild it\n");
532 else
533 pocl_llvm_read_program_llvm_irs (program, device_i, NULL);
534 }
535 else /* program->binaries but not poclbinary */
536 {
537 assert (program->binaries[device_i]);
538 int err = pocl_llvm_link_and_convert_spir (program, device_i,
539 link_program, spir_build);
540 if (err != CL_SUCCESS)
541 return err;
542 pocl_llvm_read_program_llvm_irs (program, device_i, NULL);
543 }
544 return CL_SUCCESS;
545 #else
546 POCL_RETURN_ERROR_ON ((program->pocl_binaries[device_i] == NULL),
547 CL_BUILD_PROGRAM_FAILURE,
548 "This device requires LLVM to "
549 "build from SPIR/LLVM bitcode\n");
550 return CL_SUCCESS;
551 #endif
552 }
553
554 int
pocl_driver_link_program(cl_program program,cl_uint device_i,cl_uint num_input_programs,const cl_program * input_programs,int create_library)555 pocl_driver_link_program (cl_program program, cl_uint device_i,
556 cl_uint num_input_programs,
557 const cl_program *input_programs, int create_library)
558 {
559 assert (program->devices[device_i]->linker_available == CL_TRUE);
560
561 #ifdef ENABLE_LLVM
562 cl_device_id device = program->devices[device_i];
563 /* just link binaries. */
564 unsigned char **cur_device_binaries = (unsigned char **)alloca (
565 num_input_programs * sizeof (unsigned char *));
566 size_t *cur_device_binary_sizes
567 = (size_t *)alloca (num_input_programs * sizeof (size_t));
568 void **cur_device_llvm_irs
569 = (void **)alloca (num_input_programs * sizeof (void *));
570
571 cl_uint i;
572 for (i = 0; i < num_input_programs; i++)
573 {
574 assert (device == input_programs[i]->devices[device_i]);
575 POCL_LOCK_OBJ (input_programs[i]);
576
577 cur_device_binaries[i] = input_programs[i]->binaries[device_i];
578 assert (cur_device_binaries[i]);
579 cur_device_binary_sizes[i] = input_programs[i]->binary_sizes[device_i];
580 assert (cur_device_binary_sizes[i] > 0);
581
582 pocl_llvm_read_program_llvm_irs (input_programs[i], device_i, NULL);
583
584 cur_device_llvm_irs[i] = input_programs[i]->data[device_i];
585 assert (cur_device_llvm_irs[i]);
586 POCL_UNLOCK_OBJ (input_programs[i]);
587 }
588
589 int err = pocl_llvm_link_program (
590 program, device_i, num_input_programs, cur_device_binaries,
591 cur_device_binary_sizes, cur_device_llvm_irs, !create_library, 0);
592
593 POCL_RETURN_ERROR_ON ((err != CL_SUCCESS), CL_LINK_PROGRAM_FAILURE,
594 "This device requires LLVM to link binaries\n");
595 return CL_SUCCESS;
596 #else
597 POCL_RETURN_ERROR_ON (1, CL_BUILD_PROGRAM_FAILURE,
598 "This device cannot link anything\n");
599
600 #endif
601 }
602
603 int
pocl_driver_free_program(cl_device_id device,cl_program program,unsigned program_device_i)604 pocl_driver_free_program (cl_device_id device, cl_program program,
605 unsigned program_device_i)
606 {
607 #ifdef ENABLE_LLVM
608 pocl_llvm_free_llvm_irs (program, program_device_i);
609 #endif
610 return 0;
611 }
612
613 int
pocl_driver_setup_metadata(cl_device_id device,cl_program program,unsigned program_device_i)614 pocl_driver_setup_metadata (cl_device_id device, cl_program program,
615 unsigned program_device_i)
616 {
617 #ifdef ENABLE_LLVM
618 unsigned num_kernels
619 = pocl_llvm_get_kernel_count (program, program_device_i);
620
621 /* TODO zero kernels in program case */
622 if (num_kernels)
623 {
624 program->num_kernels = num_kernels;
625 program->kernel_meta
626 = calloc (program->num_kernels, sizeof (pocl_kernel_metadata_t));
627 pocl_llvm_get_kernels_metadata (program, program_device_i);
628 }
629 return 1;
630 #else
631 return 0;
632 #endif
633 }
634
635 int
pocl_driver_supports_binary(cl_device_id device,const size_t length,const char * binary)636 pocl_driver_supports_binary (cl_device_id device, const size_t length,
637 const char *binary)
638 {
639 #ifdef ENABLE_LLVM
640
641 /* SPIR binary is supported */
642 if (bitcode_is_triple (binary, length, "spir"))
643 {
644 POCL_RETURN_ERROR_ON (
645 (strstr (device->extensions, "cl_khr_spir") == NULL),
646 CL_BUILD_PROGRAM_FAILURE,
647 "SPIR binary provided, but device has no SPIR support");
648 return 1;
649 }
650
651 /* LLVM IR can be supported by the driver, if the triple matches */
652 if (device->llvm_target_triplet
653 && bitcode_is_triple (binary, length, device->llvm_target_triplet))
654 return 1;
655
656 POCL_MSG_ERR ("Unknown binary type.\n");
657 return 0;
658 #else
659 POCL_MSG_ERR (
660 "This driver was not build with LLVM support, so "
661 "don't support loading SPIR or LLVM IR binaries, only poclbinaries.\n");
662 return 0;
663 #endif
664 }
665
666 /* Build the dynamic WG sized parallel.bc and device specific code,
667 for each kernel. This must be called *after* metadata has been setup */
668 int
pocl_driver_build_poclbinary(cl_program program,cl_uint device_i)669 pocl_driver_build_poclbinary (cl_program program, cl_uint device_i)
670 {
671 unsigned i;
672 _cl_command_node cmd;
673 cl_device_id device = program->devices[device_i];
674
675 assert (program->build_status == CL_BUILD_SUCCESS);
676 if (program->num_kernels == 0)
677 return CL_SUCCESS;
678
679 /* For binaries of other than Executable type (libraries, compiled but
680 * not linked programs, etc), do not attempt to compile the kernels. */
681 if (program->binary_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
682 return CL_SUCCESS;
683
684 memset (&cmd, 0, sizeof (_cl_command_node));
685 cmd.type = CL_COMMAND_NDRANGE_KERNEL;
686
687 POCL_LOCK_OBJ (program);
688
689 assert (program->binaries[device_i]);
690
691 cmd.device = device;
692 cmd.device_i = device_i;
693
694 struct _cl_kernel fake_k;
695 memset (&fake_k, 0, sizeof (fake_k));
696 fake_k.context = program->context;
697 fake_k.program = program;
698 fake_k.next = NULL;
699 cl_kernel kernel = &fake_k;
700
701 for (i = 0; i < program->num_kernels; i++)
702 {
703 fake_k.meta = &program->kernel_meta[i];
704 fake_k.name = fake_k.meta->name;
705 cmd.command.run.hash = fake_k.meta->build_hash[device_i];
706
707 size_t local_x = 0, local_y = 0, local_z = 0;
708
709 if (kernel->meta->reqd_wg_size[0] > 0
710 && kernel->meta->reqd_wg_size[1] > 0
711 && kernel->meta->reqd_wg_size[2] > 0)
712 {
713 local_x = kernel->meta->reqd_wg_size[0];
714 local_y = kernel->meta->reqd_wg_size[1];
715 local_z = kernel->meta->reqd_wg_size[2];
716 }
717
718 cmd.command.run.pc.local_size[0] = local_x;
719 cmd.command.run.pc.local_size[1] = local_y;
720 cmd.command.run.pc.local_size[2] = local_z;
721
722 cmd.command.run.kernel = kernel;
723
724 cmd.command.run.pc.global_offset[0] = cmd.command.run.pc.global_offset[1]
725 = cmd.command.run.pc.global_offset[2] = 0;
726
727 /* Force generate a generic WG function to ensure generality. */
728 device->ops->compile_kernel (&cmd, kernel, device, 0);
729 /* Then generate a specialized one with goffset 0 since it's a very
730 common case. */
731 device->ops->compile_kernel (&cmd, kernel, device, 1);
732 }
733
734 POCL_UNLOCK_OBJ (program);
735
736 return CL_SUCCESS;
737 }
738