1 #include "device_interface.h"
2 #include "HalideRuntime.h"
3 #include "device_buffer_utils.h"
4 #include "printer.h"
5 #include "scoped_mutex_lock.h"
6
7 extern "C" {
8
9 extern void *malloc(size_t);
10 extern void free(void *);
11 }
12
13 namespace Halide {
14 namespace Runtime {
15 namespace Internal {
16
17 struct device_handle_wrapper {
18 uint64_t device_handle;
19 const halide_device_interface_t *interface;
20 };
21
22 // TODO: Coarser grained locking, also consider all things that need
23 // to be atomic with respect to each other. At present only
24 // halide_copy_to_host, halide_copy_to_device, and halide_buffer_copy
25 // are atomic with respect to each other. halide_device_malloc and
26 // halide_device_free are also candidates, but to do so they likely
27 // need to be able to do a copy internaly as well.
28 WEAK halide_mutex device_copy_mutex;
29
copy_to_host_already_locked(void * user_context,struct halide_buffer_t * buf)30 WEAK int copy_to_host_already_locked(void *user_context, struct halide_buffer_t *buf) {
31 if (!buf->device_dirty()) {
32 return 0; // my, that was easy
33 }
34
35 debug(user_context) << "copy_to_host_already_locked " << buf << " dev_dirty is true\n";
36 const halide_device_interface_t *interface = buf->device_interface;
37 if (buf->host_dirty()) {
38 debug(user_context) << "copy_to_host_already_locked " << buf << " dev_dirty and host_dirty are true\n";
39 return halide_error_code_copy_to_host_failed;
40 }
41 if (interface == NULL) {
42 debug(user_context) << "copy_to_host_already_locked " << buf << " interface is NULL\n";
43 return halide_error_code_no_device_interface;
44 }
45 int result = interface->impl->copy_to_host(user_context, buf);
46 if (result != 0) {
47 debug(user_context) << "copy_to_host_already_locked " << buf << " device copy_to_host returned an error\n";
48 return halide_error_code_copy_to_host_failed;
49 }
50 buf->set_device_dirty(false);
51 halide_msan_annotate_buffer_is_initialized(user_context, buf);
52
53 return result;
54 }
55
56 } // namespace Internal
57 } // namespace Runtime
58 } // namespace Halide
59
60 namespace {
61
debug_log_and_validate_buf(void * user_context,const halide_buffer_t * buf_arg,const char * routine)62 ALWAYS_INLINE int debug_log_and_validate_buf(void *user_context, const halide_buffer_t *buf_arg,
63 const char *routine) {
64 if (buf_arg == NULL) {
65 return halide_error_buffer_is_null(user_context, routine);
66 }
67
68 const halide_buffer_t &buf(*buf_arg);
69 debug(user_context) << routine << " validating input buffer: " << buf << "\n";
70
71 bool device_interface_set = (buf.device_interface != NULL);
72 bool device_set = (buf.device != 0);
73 if (device_set && !device_interface_set) {
74 return halide_error_no_device_interface(user_context);
75 }
76 if (device_interface_set && !device_set) {
77 return halide_error_device_interface_no_device(user_context);
78 }
79
80 bool host_dirty = buf.host_dirty();
81 bool device_dirty = buf.device_dirty();
82 if (host_dirty && device_dirty) {
83 return halide_error_host_and_device_dirty(user_context);
84 }
85 /* TODO: we could test:
86 * (device_set || !device_dirty)
87 * and:
88 * (buf.host != NULL || !host_dirty)
89 * but these conditions can occur when freeing a buffer.
90 * It is perhaps prudent to mandate reseting the dirty bit when freeing
91 * the host field and setting it to nullptr, I am not convinced all code
92 * does that at present. The same could occur on the device side, though
93 * it is much more unlikely as halide_device_free does clear device_dirty.
94 * At present we're taking the side of caution and not adding these to the
95 * assertion.
96 */
97 return 0;
98 }
99
100 } // namespace
101
102 extern "C" {
103
104 /** Release all data associated with the current GPU backend, in particular
105 * all resources (memory, texture, context handles) allocated by Halide. Must
106 * be called explicitly when using AOT compilation. */
halide_device_release(void * user_context,const halide_device_interface_t * device_interface)107 WEAK void halide_device_release(void *user_context, const halide_device_interface_t *device_interface) {
108 device_interface->impl->device_release(user_context);
109 }
110
111 /** Copy image data from device memory to host memory. This must be called
112 * explicitly to copy back the results of a GPU-based filter. */
halide_copy_to_host(void * user_context,struct halide_buffer_t * buf)113 WEAK int halide_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
114 ScopedMutexLock lock(&device_copy_mutex);
115
116 int result = debug_log_and_validate_buf(user_context, buf, "halide_copy_to_host");
117 if (result != 0) {
118 return result;
119 }
120
121 return copy_to_host_already_locked(user_context, buf);
122 }
123
124 /** Copy image data from host memory to device memory. This should not be
125 * called directly; Halide handles copying to the device automatically. */
copy_to_device_already_locked(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)126 WEAK int copy_to_device_already_locked(void *user_context,
127 struct halide_buffer_t *buf,
128 const halide_device_interface_t *device_interface) {
129 int result = 0;
130
131 result = debug_log_and_validate_buf(user_context, buf, "halide_copy_to_device");
132 if (result != 0) {
133 return result;
134 }
135
136 if (device_interface == NULL) {
137 debug(user_context) << "halide_copy_to_device " << buf << " interface is NULL\n";
138 if (buf->device_interface == NULL) {
139 return halide_error_no_device_interface(user_context);
140 }
141 device_interface = buf->device_interface;
142 }
143
144 if (buf->device && buf->device_interface != device_interface) {
145 halide_error(user_context, "halide_copy_to_device does not support switching interfaces\n");
146 return halide_error_code_incompatible_device_interface;
147 }
148
149 if (buf->device == 0) {
150 result = halide_device_malloc(user_context, buf, device_interface);
151 if (result != 0) {
152 debug(user_context) << "halide_copy_to_device " << buf
153 << " halide_copy_to_device call to halide_device_malloc failed\n";
154 return result;
155 }
156 }
157
158 if (buf->host_dirty()) {
159 debug(user_context) << "halide_copy_to_device " << buf << " host is dirty\n";
160 if (buf->device_dirty()) {
161 debug(user_context) << "halide_copy_to_device " << buf << " dev_dirty is true error\n";
162 return halide_error_code_copy_to_device_failed;
163 } else {
164 debug(user_context) << "halide_copy_to_device " << buf << " calling copy_to_device()\n";
165 result = device_interface->impl->copy_to_device(user_context, buf);
166 if (result == 0) {
167 buf->set_host_dirty(false);
168 } else {
169 debug(user_context) << "halide_copy_to_device "
170 << buf << "device copy_to_device returned an error\n";
171 return halide_error_code_copy_to_device_failed;
172 }
173 }
174 } else {
175 debug(user_context) << "halide_copy_to_device " << buf << " skipped (host is not dirty)\n";
176 }
177
178 return 0;
179 }
180
halide_copy_to_device(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)181 WEAK int halide_copy_to_device(void *user_context,
182 struct halide_buffer_t *buf,
183 const halide_device_interface_t *device_interface) {
184 ScopedMutexLock lock(&device_copy_mutex);
185 return copy_to_device_already_locked(user_context, buf, device_interface);
186 }
187
188 /** Wait for current GPU operations to complete. Calling this explicitly
189 * should rarely be necessary, except maybe for profiling. */
halide_device_sync(void * user_context,struct halide_buffer_t * buf)190 WEAK int halide_device_sync(void *user_context, struct halide_buffer_t *buf) {
191 int result = debug_log_and_validate_buf(user_context, buf, "halide_device_sync");
192 if (result != 0) {
193 return result;
194 }
195 const halide_device_interface_t *device_interface = buf->device_interface;
196
197 if (device_interface == NULL) {
198 return halide_error_no_device_interface(user_context);
199 }
200 result = device_interface->impl->device_sync(user_context, buf);
201 if (result) {
202 return halide_error_code_device_sync_failed;
203 } else {
204 return 0;
205 }
206 }
207
208 /** Allocate device memory to back a halide_buffer_t. */
halide_device_malloc(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)209 WEAK int halide_device_malloc(void *user_context, struct halide_buffer_t *buf,
210 const halide_device_interface_t *device_interface) {
211 int result = debug_log_and_validate_buf(user_context, buf, "halide_device_malloc");
212 if (result != 0) {
213 return result;
214 }
215 debug(user_context) << "halide_device_malloc: target device interface " << device_interface << "\n";
216
217 const halide_device_interface_t *current_interface = buf->device_interface;
218
219 // halide_device_malloc does not support switching interfaces.
220 if (current_interface != NULL && current_interface != device_interface) {
221 halide_error(user_context, "halide_device_malloc doesn't support switching interfaces\n");
222 return halide_error_code_incompatible_device_interface;
223 }
224
225 // Ensure code is not freed prematurely.
226 // TODO: Exception safety...
227 device_interface->impl->use_module();
228 result = device_interface->impl->device_malloc(user_context, buf);
229 device_interface->impl->release_module();
230
231 if (result) {
232 return halide_error_code_device_malloc_failed;
233 } else {
234 return 0;
235 }
236 }
237
238 /** Free any device memory associated with a halide_buffer_t. */
halide_device_free(void * user_context,struct halide_buffer_t * buf)239 WEAK int halide_device_free(void *user_context, struct halide_buffer_t *buf) {
240 int result = debug_log_and_validate_buf(user_context, buf, "halide_device_free");
241 if (result != 0) {
242 return result;
243 }
244
245 const halide_device_interface_t *device_interface = buf->device_interface;
246 if (device_interface != NULL) {
247 // Ensure interface is not freed prematurely.
248 // TODO: Exception safety...
249 device_interface->impl->use_module();
250 result = device_interface->impl->device_free(user_context, buf);
251 device_interface->impl->release_module();
252 halide_assert(user_context, buf->device == 0);
253 if (result) {
254 return halide_error_code_device_free_failed;
255 } else {
256 return 0;
257 }
258 }
259 buf->set_device_dirty(false);
260 return 0;
261 }
262
263 /** Free any device memory associated with a halide_buffer_t and ignore any
264 * error. Used when freeing as a destructor on an error. */
halide_device_free_as_destructor(void * user_context,void * obj)265 WEAK void halide_device_free_as_destructor(void *user_context, void *obj) {
266 struct halide_buffer_t *buf = (struct halide_buffer_t *)obj;
267 halide_device_free(user_context, buf);
268 }
269
270 /** Allocate host and device memory to back a halide_buffer_t. Ideally this
271 * will be a zero copy setup, but the default implementation may
272 * separately allocate the host memory using halide_malloc and the
273 * device memory using halide_device_malloc. */
halide_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)274 WEAK int halide_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf,
275 const halide_device_interface_t *device_interface) {
276 int result = debug_log_and_validate_buf(user_context, buf, "halide_device_and_host_malloc");
277 if (result != 0) {
278 return result;
279 }
280 debug(user_context) << "halide_device_and_host_malloc: target device interface " << device_interface << "\n";
281
282 const halide_device_interface_t *current_interface = buf->device_interface;
283
284 // halide_device_malloc does not support switching interfaces.
285 if (current_interface != NULL && current_interface != device_interface) {
286 halide_error(user_context, "halide_device_and_host_malloc doesn't support switching interfaces\n");
287 return halide_error_code_incompatible_device_interface;
288 }
289
290 // Ensure code is not freed prematurely.
291 // TODO: Exception safety...
292 device_interface->impl->use_module();
293 result = device_interface->impl->device_and_host_malloc(user_context, buf);
294 device_interface->impl->release_module();
295
296 if (result != 0) {
297 halide_error(user_context, "allocating host and device memory failed\n");
298 return halide_error_code_device_malloc_failed;
299 }
300 return 0;
301 }
302
303 /** Free host and device memory associated with a halide_buffer_t. */
halide_device_and_host_free(void * user_context,struct halide_buffer_t * buf)304 WEAK int halide_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
305 int result = debug_log_and_validate_buf(user_context, buf, "halide_device_and_host_free");
306 if (result != 0) {
307 return result;
308 }
309
310 const halide_device_interface_t *device_interface = buf->device_interface;
311 if (device_interface != NULL) {
312 // Ensure interface is not freed prematurely.
313 // TODO: Exception safety...
314 device_interface->impl->use_module();
315 result = device_interface->impl->device_and_host_free(user_context, buf);
316 device_interface->impl->release_module();
317 halide_assert(user_context, buf->device == 0);
318 if (result) {
319 return halide_error_code_device_free_failed;
320 } else {
321 return 0;
322 }
323 } else if (buf->host) {
324 // device_free must have been called on this buffer (which
325 // must be legal for the device interface that was
326 // used). We'd better still free the host pointer.
327 halide_free(user_context, buf->host);
328 buf->host = NULL;
329 }
330 buf->set_device_dirty(false);
331 return 0;
332 }
333
halide_default_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)334 WEAK int halide_default_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf,
335 const halide_device_interface_t *device_interface) {
336 int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_and_host_malloc");
337 if (result != 0) {
338 return result;
339 }
340 size_t size = buf->size_in_bytes();
341 buf->host = (uint8_t *)halide_malloc(user_context, size);
342 if (buf->host == NULL) {
343 return -1;
344 }
345 result = halide_device_malloc(user_context, buf, device_interface);
346 if (result != 0) {
347 halide_free(user_context, buf->host);
348 buf->host = NULL;
349 }
350 return result;
351 }
352
halide_default_device_and_host_free(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)353 WEAK int halide_default_device_and_host_free(void *user_context, struct halide_buffer_t *buf,
354 const halide_device_interface_t *device_interface) {
355 int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_and_host_free");
356 if (result != 0) {
357 return result;
358 }
359 result = halide_device_free(user_context, buf);
360 if (buf->host) {
361 halide_free(user_context, buf->host);
362 buf->host = NULL;
363 }
364 buf->set_host_dirty(false);
365 buf->set_device_dirty(false);
366 return result;
367 }
368
halide_device_wrap_native(void * user_context,struct halide_buffer_t * buf,uint64_t handle,const halide_device_interface_t * device_interface)369 WEAK int halide_device_wrap_native(void *user_context, struct halide_buffer_t *buf, uint64_t handle,
370 const halide_device_interface_t *device_interface) {
371 int result = debug_log_and_validate_buf(user_context, buf, "halide_device_wrap_native");
372 if (result != 0) {
373 return result;
374 }
375 const halide_device_interface_t *current_interface = buf->device_interface;
376
377 if (current_interface != NULL && current_interface != device_interface) {
378 halide_error(user_context, "halide_device_wrap_native doesn't support switching interfaces\n");
379 return halide_error_code_incompatible_device_interface;
380 }
381
382 device_interface->impl->use_module();
383 buf->device_interface = device_interface;
384 result = device_interface->impl->wrap_native(user_context, buf, handle);
385 device_interface->impl->release_module();
386
387 if (result) {
388 return halide_error_code_device_malloc_failed;
389 }
390 return 0;
391 }
392
halide_device_detach_native(void * user_context,struct halide_buffer_t * buf)393 WEAK int halide_device_detach_native(void *user_context, struct halide_buffer_t *buf) {
394 int result = debug_log_and_validate_buf(user_context, buf, "halide_device_detach_native");
395 if (result != 0) {
396 return result;
397 }
398 const halide_device_interface_t *device_interface = buf->device_interface;
399 if (device_interface != NULL) {
400 device_interface->impl->use_module();
401 result = device_interface->impl->detach_native(user_context, buf);
402 device_interface->impl->release_module();
403 halide_assert(user_context, buf->device == 0);
404 if (result) {
405 result = halide_error_code_device_detach_native_failed;
406 }
407 }
408 return result;
409 }
410
halide_default_device_wrap_native(void * user_context,struct halide_buffer_t * buf,uint64_t handle)411 WEAK int halide_default_device_wrap_native(void *user_context, struct halide_buffer_t *buf, uint64_t handle) {
412 int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_wrap_native");
413 if (result != 0) {
414 return result;
415 }
416 buf->device_interface->impl->use_module();
417 buf->device = handle;
418 return 0;
419 }
420
halide_default_device_detach_native(void * user_context,struct halide_buffer_t * buf)421 WEAK int halide_default_device_detach_native(void *user_context, struct halide_buffer_t *buf) {
422 int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_detach_native");
423 if (result != 0) {
424 return result;
425 }
426 if (buf->device == 0) {
427 return 0;
428 }
429 buf->device_interface->impl->release_module();
430 buf->device = 0;
431 buf->device_interface = NULL;
432 return 0;
433 }
434
435 /** Free any host and device memory associated with a halide_buffer_t and ignore any
436 * error. Used when freeing as a destructor on an error. */
halide_device_and_host_free_as_destructor(void * user_context,void * obj)437 WEAK void halide_device_and_host_free_as_destructor(void *user_context, void *obj) {
438 struct halide_buffer_t *buf = (struct halide_buffer_t *)obj;
439 halide_device_and_host_free(user_context, buf);
440 }
441
442 /** TODO: Find a way to elide host free without this hack. */
halide_device_host_nop_free(void * user_context,void * obj)443 WEAK void halide_device_host_nop_free(void *user_context, void *obj) {
444 }
445
halide_default_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)446 WEAK int halide_default_buffer_copy(void *user_context, struct halide_buffer_t *src,
447 const struct halide_device_interface_t *dst_device_interface,
448 struct halide_buffer_t *dst) {
449
450 debug(user_context)
451 << "halide_default_buffer_copy\n"
452 << " source: " << *src << "\n"
453 << " dst_device_interface: " << (void *)dst_device_interface << "\n"
454 << " dst: " << *dst << "\n";
455
456 // The right thing is that all devices have to support
457 // device-to-device and device-to/from-arbitrarty-pointer. This
458 // means there will always have to be a device specifc version of
459 // this function and the default can go away or fail. At present
460 // there are some devices, e.g. OpenGL and OpenGLCompute, for which
461 // this is not yet implemented.
462
463 return halide_error_code_device_buffer_copy_failed;
464 }
465
halide_buffer_copy_already_locked(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)466 WEAK int halide_buffer_copy_already_locked(void *user_context, struct halide_buffer_t *src,
467 const struct halide_device_interface_t *dst_device_interface,
468 struct halide_buffer_t *dst) {
469 debug(user_context) << "halide_buffer_copy_already_locked called.\n";
470 int err = 0;
471
472 if (dst_device_interface && dst->device_interface &&
473 dst_device_interface != dst->device_interface) {
474 halide_error(user_context, "halide_buffer_copy does not support switching device interfaces");
475 return halide_error_code_incompatible_device_interface;
476 }
477
478 if (dst_device_interface && !dst->device) {
479 debug(user_context) << "halide_buffer_copy_already_locked: calling halide_device_malloc.\n";
480 err = halide_device_malloc(user_context, dst, dst_device_interface);
481 if (err) {
482 return err;
483 }
484 }
485
486 // First goal is correctness, the more interesting parts of which are:
487 // 1) Respect dirty bits so data is valid.
488 // 2) Don't infinitely recurse.
489 // Second goal is efficiency:
490 // 1) Try to do device-to-device if possible
491 // 2) Minimum number of copies and minimum amount of copying otherwise.
492 // 2a) e.g. for a device to different device buffer copy call where the copy must
493 // go through host memory, the src buffer may be left in device dirty state
494 // with the data copied through the destination host buffer to reduce the size
495 // of the copy.
496 // The device specifc runtime routine may return an error for the
497 // device to device case with separate devices. This code will attempt
498 // to decompose the call via bouncing through host memory.
499 //
500 // At present some cases, such as different devices where there is
501 // no host buffer, will return an error. Some of these could be
502 // handled by allocating temporary host memory.
503 //
504 // It is assumed that if two device runtimes have copy compatible buffers
505 // both will handle a copy between their types of buffers.
506
507 // Give more descriptive names to conditions.
508 const bool from_device_valid = (src->device != 0) &&
509 (src->host == NULL || !src->host_dirty());
510 const bool to_device = dst_device_interface != NULL;
511 const bool to_host = dst_device_interface == NULL;
512 const bool from_host_exists = src->host != NULL;
513 const bool from_host_valid = from_host_exists &&
514 (!src->device_dirty() || (src->device_interface == NULL));
515 const bool to_host_exists = dst->host != NULL;
516
517 if (to_host && !to_host_exists) {
518 return halide_error_code_host_is_null;
519 }
520
521 // If a device to device copy is requested, try to do it directly.
522 err = halide_error_code_incompatible_device_interface;
523 if (from_device_valid && to_device) {
524 debug(user_context) << "halide_buffer_copy_already_locked: device to device case.\n";
525 err = dst_device_interface->impl->buffer_copy(user_context, src, dst_device_interface, dst);
526 }
527
528 if (err == halide_error_code_incompatible_device_interface) {
529 // Return an error for a case that cannot make progress without a temporary allocation.
530 // TODO: go ahead and do the temp allocation.
531 if (!from_host_exists && !to_host_exists) {
532 debug(user_context) << "halide_buffer_copy_already_locked: failing due to need for temp buffer.\n";
533 return halide_error_code_incompatible_device_interface;
534 }
535
536 if (to_host && from_host_valid) {
537 device_copy c = make_buffer_copy(src, true, dst, true);
538 copy_memory(c, user_context);
539 err = 0;
540 } else if (to_host) {
541 debug(user_context) << "halide_buffer_copy_already_locked: to host case.\n";
542 err = src->device_interface->impl->buffer_copy(user_context, src, NULL, dst);
543 // Return on success or an error indicating something other
544 // than not handling this case went wrong.
545 if (err == halide_error_code_incompatible_device_interface) {
546 err = copy_to_host_already_locked(user_context, src);
547 if (!err) {
548 err = halide_buffer_copy_already_locked(user_context, src, NULL, dst);
549 }
550 }
551 } else {
552 if (from_device_valid && to_host_exists) {
553 debug(user_context) << "halide_buffer_copy_already_locked: from_device_valid && to_host_exists case.\n";
554 // dev -> dev via dst host memory
555 debug(user_context) << " device -> device via dst host memory\n";
556 err = src->device_interface->impl->buffer_copy(user_context, src, NULL, dst);
557 if (err == 0) {
558 dst->set_host_dirty(true);
559 err = copy_to_device_already_locked(user_context, dst, dst_device_interface);
560 }
561 } else {
562 debug(user_context) << "halide_buffer_copy_already_locked: dev -> dev via src host memory.\n";
563 // dev -> dev via src host memory.
564 err = copy_to_host_already_locked(user_context, src);
565 if (err == 0) {
566 err = dst_device_interface->impl->buffer_copy(user_context, src, dst_device_interface, dst);
567 }
568 }
569 }
570 }
571
572 if (err != 0) {
573 debug(user_context) << "halide_buffer_copy_already_locked: got error " << err << ".\n";
574 }
575 if (err == 0 && dst != src) {
576 if (dst_device_interface) {
577 debug(user_context) << "halide_buffer_copy_already_locked: setting device dirty.\n";
578 dst->set_host_dirty(false);
579 dst->set_device_dirty(true);
580 } else {
581 debug(user_context) << "halide_buffer_copy_already_locked: setting host dirty.\n";
582 dst->set_host_dirty(true);
583 dst->set_device_dirty(false);
584 }
585 }
586
587 return err;
588 }
589
halide_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)590 WEAK int halide_buffer_copy(void *user_context, struct halide_buffer_t *src,
591 const struct halide_device_interface_t *dst_device_interface,
592 struct halide_buffer_t *dst) {
593 debug(user_context) << "halide_buffer_copy:\n"
594 << " src " << *src << "\n"
595 << " interface " << dst_device_interface << "\n"
596 << " dst " << *dst << "\n";
597
598 ScopedMutexLock lock(&device_copy_mutex);
599
600 if (dst_device_interface) {
601 dst_device_interface->impl->use_module();
602 }
603 if (src->device_interface) {
604 src->device_interface->impl->use_module();
605 }
606
607 int err = halide_buffer_copy_already_locked(user_context, src, dst_device_interface, dst);
608
609 if (dst_device_interface) {
610 dst_device_interface->impl->release_module();
611 }
612 if (src->device_interface) {
613 src->device_interface->impl->release_module();
614 }
615
616 return err;
617 }
618
halide_default_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)619 WEAK int halide_default_device_crop(void *user_context,
620 const struct halide_buffer_t *src,
621 struct halide_buffer_t *dst) {
622 halide_error(user_context, "device_interface does not support cropping\n");
623 return halide_error_code_device_crop_unsupported;
624 }
625
halide_default_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)626 WEAK int halide_default_device_slice(void *user_context,
627 const struct halide_buffer_t *src,
628 int slice_dim, int slice_pos,
629 struct halide_buffer_t *dst) {
630 halide_error(user_context, "device_interface does not support slicing\n");
631 return halide_error_code_device_crop_unsupported;
632 }
633
halide_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)634 WEAK int halide_device_crop(void *user_context,
635 const struct halide_buffer_t *src,
636 struct halide_buffer_t *dst) {
637 ScopedMutexLock lock(&device_copy_mutex);
638
639 if (!src->device) {
640 return 0;
641 }
642
643 if (dst->device) {
644 halide_error(user_context, "destination buffer already has a device allocation\n");
645 return halide_error_code_device_crop_failed;
646 }
647
648 if (src->dimensions != dst->dimensions) {
649 halide_error(user_context, "src and dst must have identical dimensionality\n");
650 return halide_error_code_device_crop_failed;
651 }
652
653 src->device_interface->impl->use_module();
654 int err = src->device_interface->impl->device_crop(user_context, src, dst);
655
656 debug(user_context) << "halide_device_crop "
657 << "\n"
658 << " src: " << *src << "\n"
659 << " dst: " << *dst << "\n";
660
661 return err;
662 }
663
halide_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)664 WEAK int halide_device_slice(void *user_context,
665 const struct halide_buffer_t *src,
666 int slice_dim, int slice_pos,
667 struct halide_buffer_t *dst) {
668 ScopedMutexLock lock(&device_copy_mutex);
669
670 if (!src->device) {
671 return 0;
672 }
673
674 if (dst->device) {
675 halide_error(user_context, "destination buffer already has a device allocation\n");
676 return halide_error_code_device_crop_failed;
677 }
678
679 if (src->dimensions != dst->dimensions + 1) {
680 halide_error(user_context, "dst must have exactly one fewer dimension than src\n");
681 return halide_error_code_device_crop_failed;
682 }
683
684 src->device_interface->impl->use_module();
685 int err = src->device_interface->impl->device_slice(user_context, src, slice_dim, slice_pos, dst);
686
687 debug(user_context) << "halide_device_crop "
688 << "\n"
689 << " src: " << *src << "\n"
690 << " dst: " << *dst << "\n";
691
692 return err;
693 }
694
halide_default_device_release_crop(void * user_context,struct halide_buffer_t * buf)695 WEAK int halide_default_device_release_crop(void *user_context,
696 struct halide_buffer_t *buf) {
697 if (!buf->device) {
698 return 0;
699 }
700 halide_error(user_context, "device_interface does not support cropping\n");
701 return halide_error_code_device_crop_unsupported;
702 }
703
halide_device_release_crop(void * user_context,struct halide_buffer_t * buf)704 WEAK int halide_device_release_crop(void *user_context,
705 struct halide_buffer_t *buf) {
706 if (buf->device) {
707 ScopedMutexLock lock(&device_copy_mutex);
708 const struct halide_device_interface_t *interface = buf->device_interface;
709 int result = interface->impl->device_release_crop(user_context, buf);
710 buf->device = 0;
711 interface->impl->release_module();
712 buf->device_interface = NULL;
713 return result;
714 }
715 return 0;
716 }
717
718 } // extern "C" linkage
719