1 #include "device_interface.h"
2 #include "HalideRuntime.h"
3 #include "device_buffer_utils.h"
4 #include "printer.h"
5 #include "scoped_mutex_lock.h"
6 
7 extern "C" {
8 
9 extern void *malloc(size_t);
10 extern void free(void *);
11 }
12 
13 namespace Halide {
14 namespace Runtime {
15 namespace Internal {
16 
17 struct device_handle_wrapper {
18     uint64_t device_handle;
19     const halide_device_interface_t *interface;
20 };
21 
22 // TODO: Coarser grained locking, also consider all things that need
23 // to be atomic with respect to each other. At present only
24 // halide_copy_to_host, halide_copy_to_device, and halide_buffer_copy
25 // are atomic with respect to each other. halide_device_malloc and
26 // halide_device_free are also candidates, but to do so they likely
27 // need to be able to do a copy internaly as well.
28 WEAK halide_mutex device_copy_mutex;
29 
copy_to_host_already_locked(void * user_context,struct halide_buffer_t * buf)30 WEAK int copy_to_host_already_locked(void *user_context, struct halide_buffer_t *buf) {
31     if (!buf->device_dirty()) {
32         return 0;  // my, that was easy
33     }
34 
35     debug(user_context) << "copy_to_host_already_locked " << buf << " dev_dirty is true\n";
36     const halide_device_interface_t *interface = buf->device_interface;
37     if (buf->host_dirty()) {
38         debug(user_context) << "copy_to_host_already_locked " << buf << " dev_dirty and host_dirty are true\n";
39         return halide_error_code_copy_to_host_failed;
40     }
41     if (interface == NULL) {
42         debug(user_context) << "copy_to_host_already_locked " << buf << " interface is NULL\n";
43         return halide_error_code_no_device_interface;
44     }
45     int result = interface->impl->copy_to_host(user_context, buf);
46     if (result != 0) {
47         debug(user_context) << "copy_to_host_already_locked " << buf << " device copy_to_host returned an error\n";
48         return halide_error_code_copy_to_host_failed;
49     }
50     buf->set_device_dirty(false);
51     halide_msan_annotate_buffer_is_initialized(user_context, buf);
52 
53     return result;
54 }
55 
56 }  // namespace Internal
57 }  // namespace Runtime
58 }  // namespace Halide
59 
60 namespace {
61 
debug_log_and_validate_buf(void * user_context,const halide_buffer_t * buf_arg,const char * routine)62 ALWAYS_INLINE int debug_log_and_validate_buf(void *user_context, const halide_buffer_t *buf_arg,
63                                              const char *routine) {
64     if (buf_arg == NULL) {
65         return halide_error_buffer_is_null(user_context, routine);
66     }
67 
68     const halide_buffer_t &buf(*buf_arg);
69     debug(user_context) << routine << " validating input buffer: " << buf << "\n";
70 
71     bool device_interface_set = (buf.device_interface != NULL);
72     bool device_set = (buf.device != 0);
73     if (device_set && !device_interface_set) {
74         return halide_error_no_device_interface(user_context);
75     }
76     if (device_interface_set && !device_set) {
77         return halide_error_device_interface_no_device(user_context);
78     }
79 
80     bool host_dirty = buf.host_dirty();
81     bool device_dirty = buf.device_dirty();
82     if (host_dirty && device_dirty) {
83         return halide_error_host_and_device_dirty(user_context);
84     }
85     /* TODO: we could test:
86      *     (device_set || !device_dirty)
87      * and:
88      *     (buf.host != NULL || !host_dirty)
89      * but these conditions can occur when freeing a buffer.
90      * It is perhaps prudent to mandate reseting the dirty bit when freeing
91      * the host field and setting it to nullptr, I am not convinced all code
92      * does that at present. The same could occur on the device side, though
93      * it is much more unlikely as halide_device_free does clear device_dirty.
94      * At present we're taking the side of caution and not adding these to the
95      * assertion.
96      */
97     return 0;
98 }
99 
100 }  // namespace
101 
102 extern "C" {
103 
104 /** Release all data associated with the current GPU backend, in particular
105  * all resources (memory, texture, context handles) allocated by Halide. Must
106  * be called explicitly when using AOT compilation. */
halide_device_release(void * user_context,const halide_device_interface_t * device_interface)107 WEAK void halide_device_release(void *user_context, const halide_device_interface_t *device_interface) {
108     device_interface->impl->device_release(user_context);
109 }
110 
111 /** Copy image data from device memory to host memory. This must be called
112  * explicitly to copy back the results of a GPU-based filter. */
halide_copy_to_host(void * user_context,struct halide_buffer_t * buf)113 WEAK int halide_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
114     ScopedMutexLock lock(&device_copy_mutex);
115 
116     int result = debug_log_and_validate_buf(user_context, buf, "halide_copy_to_host");
117     if (result != 0) {
118         return result;
119     }
120 
121     return copy_to_host_already_locked(user_context, buf);
122 }
123 
124 /** Copy image data from host memory to device memory. This should not be
125  * called directly; Halide handles copying to the device automatically. */
copy_to_device_already_locked(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)126 WEAK int copy_to_device_already_locked(void *user_context,
127                                        struct halide_buffer_t *buf,
128                                        const halide_device_interface_t *device_interface) {
129     int result = 0;
130 
131     result = debug_log_and_validate_buf(user_context, buf, "halide_copy_to_device");
132     if (result != 0) {
133         return result;
134     }
135 
136     if (device_interface == NULL) {
137         debug(user_context) << "halide_copy_to_device " << buf << " interface is NULL\n";
138         if (buf->device_interface == NULL) {
139             return halide_error_no_device_interface(user_context);
140         }
141         device_interface = buf->device_interface;
142     }
143 
144     if (buf->device && buf->device_interface != device_interface) {
145         halide_error(user_context, "halide_copy_to_device does not support switching interfaces\n");
146         return halide_error_code_incompatible_device_interface;
147     }
148 
149     if (buf->device == 0) {
150         result = halide_device_malloc(user_context, buf, device_interface);
151         if (result != 0) {
152             debug(user_context) << "halide_copy_to_device " << buf
153                                 << " halide_copy_to_device call to halide_device_malloc failed\n";
154             return result;
155         }
156     }
157 
158     if (buf->host_dirty()) {
159         debug(user_context) << "halide_copy_to_device " << buf << " host is dirty\n";
160         if (buf->device_dirty()) {
161             debug(user_context) << "halide_copy_to_device " << buf << " dev_dirty is true error\n";
162             return halide_error_code_copy_to_device_failed;
163         } else {
164             debug(user_context) << "halide_copy_to_device " << buf << " calling copy_to_device()\n";
165             result = device_interface->impl->copy_to_device(user_context, buf);
166             if (result == 0) {
167                 buf->set_host_dirty(false);
168             } else {
169                 debug(user_context) << "halide_copy_to_device "
170                                     << buf << "device copy_to_device returned an error\n";
171                 return halide_error_code_copy_to_device_failed;
172             }
173         }
174     } else {
175         debug(user_context) << "halide_copy_to_device " << buf << " skipped (host is not dirty)\n";
176     }
177 
178     return 0;
179 }
180 
halide_copy_to_device(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)181 WEAK int halide_copy_to_device(void *user_context,
182                                struct halide_buffer_t *buf,
183                                const halide_device_interface_t *device_interface) {
184     ScopedMutexLock lock(&device_copy_mutex);
185     return copy_to_device_already_locked(user_context, buf, device_interface);
186 }
187 
188 /** Wait for current GPU operations to complete. Calling this explicitly
189  * should rarely be necessary, except maybe for profiling. */
halide_device_sync(void * user_context,struct halide_buffer_t * buf)190 WEAK int halide_device_sync(void *user_context, struct halide_buffer_t *buf) {
191     int result = debug_log_and_validate_buf(user_context, buf, "halide_device_sync");
192     if (result != 0) {
193         return result;
194     }
195     const halide_device_interface_t *device_interface = buf->device_interface;
196 
197     if (device_interface == NULL) {
198         return halide_error_no_device_interface(user_context);
199     }
200     result = device_interface->impl->device_sync(user_context, buf);
201     if (result) {
202         return halide_error_code_device_sync_failed;
203     } else {
204         return 0;
205     }
206 }
207 
208 /** Allocate device memory to back a halide_buffer_t. */
halide_device_malloc(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)209 WEAK int halide_device_malloc(void *user_context, struct halide_buffer_t *buf,
210                               const halide_device_interface_t *device_interface) {
211     int result = debug_log_and_validate_buf(user_context, buf, "halide_device_malloc");
212     if (result != 0) {
213         return result;
214     }
215     debug(user_context) << "halide_device_malloc: target device interface " << device_interface << "\n";
216 
217     const halide_device_interface_t *current_interface = buf->device_interface;
218 
219     // halide_device_malloc does not support switching interfaces.
220     if (current_interface != NULL && current_interface != device_interface) {
221         halide_error(user_context, "halide_device_malloc doesn't support switching interfaces\n");
222         return halide_error_code_incompatible_device_interface;
223     }
224 
225     // Ensure code is not freed prematurely.
226     // TODO: Exception safety...
227     device_interface->impl->use_module();
228     result = device_interface->impl->device_malloc(user_context, buf);
229     device_interface->impl->release_module();
230 
231     if (result) {
232         return halide_error_code_device_malloc_failed;
233     } else {
234         return 0;
235     }
236 }
237 
238 /** Free any device memory associated with a halide_buffer_t. */
halide_device_free(void * user_context,struct halide_buffer_t * buf)239 WEAK int halide_device_free(void *user_context, struct halide_buffer_t *buf) {
240     int result = debug_log_and_validate_buf(user_context, buf, "halide_device_free");
241     if (result != 0) {
242         return result;
243     }
244 
245     const halide_device_interface_t *device_interface = buf->device_interface;
246     if (device_interface != NULL) {
247         // Ensure interface is not freed prematurely.
248         // TODO: Exception safety...
249         device_interface->impl->use_module();
250         result = device_interface->impl->device_free(user_context, buf);
251         device_interface->impl->release_module();
252         halide_assert(user_context, buf->device == 0);
253         if (result) {
254             return halide_error_code_device_free_failed;
255         } else {
256             return 0;
257         }
258     }
259     buf->set_device_dirty(false);
260     return 0;
261 }
262 
263 /** Free any device memory associated with a halide_buffer_t and ignore any
264  * error. Used when freeing as a destructor on an error. */
halide_device_free_as_destructor(void * user_context,void * obj)265 WEAK void halide_device_free_as_destructor(void *user_context, void *obj) {
266     struct halide_buffer_t *buf = (struct halide_buffer_t *)obj;
267     halide_device_free(user_context, buf);
268 }
269 
270 /** Allocate host and device memory to back a halide_buffer_t. Ideally this
271  * will be a zero copy setup, but the default implementation may
272  * separately allocate the host memory using halide_malloc and the
273  * device memory using halide_device_malloc. */
halide_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)274 WEAK int halide_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf,
275                                        const halide_device_interface_t *device_interface) {
276     int result = debug_log_and_validate_buf(user_context, buf, "halide_device_and_host_malloc");
277     if (result != 0) {
278         return result;
279     }
280     debug(user_context) << "halide_device_and_host_malloc: target device interface " << device_interface << "\n";
281 
282     const halide_device_interface_t *current_interface = buf->device_interface;
283 
284     // halide_device_malloc does not support switching interfaces.
285     if (current_interface != NULL && current_interface != device_interface) {
286         halide_error(user_context, "halide_device_and_host_malloc doesn't support switching interfaces\n");
287         return halide_error_code_incompatible_device_interface;
288     }
289 
290     // Ensure code is not freed prematurely.
291     // TODO: Exception safety...
292     device_interface->impl->use_module();
293     result = device_interface->impl->device_and_host_malloc(user_context, buf);
294     device_interface->impl->release_module();
295 
296     if (result != 0) {
297         halide_error(user_context, "allocating host and device memory failed\n");
298         return halide_error_code_device_malloc_failed;
299     }
300     return 0;
301 }
302 
303 /** Free host and device memory associated with a halide_buffer_t. */
halide_device_and_host_free(void * user_context,struct halide_buffer_t * buf)304 WEAK int halide_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
305     int result = debug_log_and_validate_buf(user_context, buf, "halide_device_and_host_free");
306     if (result != 0) {
307         return result;
308     }
309 
310     const halide_device_interface_t *device_interface = buf->device_interface;
311     if (device_interface != NULL) {
312         // Ensure interface is not freed prematurely.
313         // TODO: Exception safety...
314         device_interface->impl->use_module();
315         result = device_interface->impl->device_and_host_free(user_context, buf);
316         device_interface->impl->release_module();
317         halide_assert(user_context, buf->device == 0);
318         if (result) {
319             return halide_error_code_device_free_failed;
320         } else {
321             return 0;
322         }
323     } else if (buf->host) {
324         // device_free must have been called on this buffer (which
325         // must be legal for the device interface that was
326         // used). We'd better still free the host pointer.
327         halide_free(user_context, buf->host);
328         buf->host = NULL;
329     }
330     buf->set_device_dirty(false);
331     return 0;
332 }
333 
halide_default_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)334 WEAK int halide_default_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf,
335                                                const halide_device_interface_t *device_interface) {
336     int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_and_host_malloc");
337     if (result != 0) {
338         return result;
339     }
340     size_t size = buf->size_in_bytes();
341     buf->host = (uint8_t *)halide_malloc(user_context, size);
342     if (buf->host == NULL) {
343         return -1;
344     }
345     result = halide_device_malloc(user_context, buf, device_interface);
346     if (result != 0) {
347         halide_free(user_context, buf->host);
348         buf->host = NULL;
349     }
350     return result;
351 }
352 
halide_default_device_and_host_free(void * user_context,struct halide_buffer_t * buf,const halide_device_interface_t * device_interface)353 WEAK int halide_default_device_and_host_free(void *user_context, struct halide_buffer_t *buf,
354                                              const halide_device_interface_t *device_interface) {
355     int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_and_host_free");
356     if (result != 0) {
357         return result;
358     }
359     result = halide_device_free(user_context, buf);
360     if (buf->host) {
361         halide_free(user_context, buf->host);
362         buf->host = NULL;
363     }
364     buf->set_host_dirty(false);
365     buf->set_device_dirty(false);
366     return result;
367 }
368 
halide_device_wrap_native(void * user_context,struct halide_buffer_t * buf,uint64_t handle,const halide_device_interface_t * device_interface)369 WEAK int halide_device_wrap_native(void *user_context, struct halide_buffer_t *buf, uint64_t handle,
370                                    const halide_device_interface_t *device_interface) {
371     int result = debug_log_and_validate_buf(user_context, buf, "halide_device_wrap_native");
372     if (result != 0) {
373         return result;
374     }
375     const halide_device_interface_t *current_interface = buf->device_interface;
376 
377     if (current_interface != NULL && current_interface != device_interface) {
378         halide_error(user_context, "halide_device_wrap_native doesn't support switching interfaces\n");
379         return halide_error_code_incompatible_device_interface;
380     }
381 
382     device_interface->impl->use_module();
383     buf->device_interface = device_interface;
384     result = device_interface->impl->wrap_native(user_context, buf, handle);
385     device_interface->impl->release_module();
386 
387     if (result) {
388         return halide_error_code_device_malloc_failed;
389     }
390     return 0;
391 }
392 
halide_device_detach_native(void * user_context,struct halide_buffer_t * buf)393 WEAK int halide_device_detach_native(void *user_context, struct halide_buffer_t *buf) {
394     int result = debug_log_and_validate_buf(user_context, buf, "halide_device_detach_native");
395     if (result != 0) {
396         return result;
397     }
398     const halide_device_interface_t *device_interface = buf->device_interface;
399     if (device_interface != NULL) {
400         device_interface->impl->use_module();
401         result = device_interface->impl->detach_native(user_context, buf);
402         device_interface->impl->release_module();
403         halide_assert(user_context, buf->device == 0);
404         if (result) {
405             result = halide_error_code_device_detach_native_failed;
406         }
407     }
408     return result;
409 }
410 
halide_default_device_wrap_native(void * user_context,struct halide_buffer_t * buf,uint64_t handle)411 WEAK int halide_default_device_wrap_native(void *user_context, struct halide_buffer_t *buf, uint64_t handle) {
412     int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_wrap_native");
413     if (result != 0) {
414         return result;
415     }
416     buf->device_interface->impl->use_module();
417     buf->device = handle;
418     return 0;
419 }
420 
halide_default_device_detach_native(void * user_context,struct halide_buffer_t * buf)421 WEAK int halide_default_device_detach_native(void *user_context, struct halide_buffer_t *buf) {
422     int result = debug_log_and_validate_buf(user_context, buf, "halide_default_device_detach_native");
423     if (result != 0) {
424         return result;
425     }
426     if (buf->device == 0) {
427         return 0;
428     }
429     buf->device_interface->impl->release_module();
430     buf->device = 0;
431     buf->device_interface = NULL;
432     return 0;
433 }
434 
435 /** Free any host and device memory associated with a halide_buffer_t and ignore any
436  * error. Used when freeing as a destructor on an error. */
halide_device_and_host_free_as_destructor(void * user_context,void * obj)437 WEAK void halide_device_and_host_free_as_destructor(void *user_context, void *obj) {
438     struct halide_buffer_t *buf = (struct halide_buffer_t *)obj;
439     halide_device_and_host_free(user_context, buf);
440 }
441 
442 /** TODO: Find a way to elide host free without this hack. */
halide_device_host_nop_free(void * user_context,void * obj)443 WEAK void halide_device_host_nop_free(void *user_context, void *obj) {
444 }
445 
halide_default_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)446 WEAK int halide_default_buffer_copy(void *user_context, struct halide_buffer_t *src,
447                                     const struct halide_device_interface_t *dst_device_interface,
448                                     struct halide_buffer_t *dst) {
449 
450     debug(user_context)
451         << "halide_default_buffer_copy\n"
452         << " source: " << *src << "\n"
453         << " dst_device_interface: " << (void *)dst_device_interface << "\n"
454         << " dst: " << *dst << "\n";
455 
456     // The right thing is that all devices have to support
457     // device-to-device and device-to/from-arbitrarty-pointer.  This
458     // means there will always have to be a device specifc version of
459     // this function and the default can go away or fail. At present
460     // there are some devices, e.g. OpenGL and OpenGLCompute, for which
461     // this is not yet implemented.
462 
463     return halide_error_code_device_buffer_copy_failed;
464 }
465 
halide_buffer_copy_already_locked(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)466 WEAK int halide_buffer_copy_already_locked(void *user_context, struct halide_buffer_t *src,
467                                            const struct halide_device_interface_t *dst_device_interface,
468                                            struct halide_buffer_t *dst) {
469     debug(user_context) << "halide_buffer_copy_already_locked called.\n";
470     int err = 0;
471 
472     if (dst_device_interface && dst->device_interface &&
473         dst_device_interface != dst->device_interface) {
474         halide_error(user_context, "halide_buffer_copy does not support switching device interfaces");
475         return halide_error_code_incompatible_device_interface;
476     }
477 
478     if (dst_device_interface && !dst->device) {
479         debug(user_context) << "halide_buffer_copy_already_locked: calling halide_device_malloc.\n";
480         err = halide_device_malloc(user_context, dst, dst_device_interface);
481         if (err) {
482             return err;
483         }
484     }
485 
486     // First goal is correctness, the more interesting parts of which are:
487     //      1) Respect dirty bits so data is valid.
488     //      2) Don't infinitely recurse.
489     // Second goal is efficiency:
490     //      1) Try to do device-to-device if possible
491     //      2) Minimum number of copies and minimum amount of copying otherwise.
492     //      2a) e.g. for a device to different device buffer copy call where the copy must
493     //          go through host memory, the src buffer may be left in device dirty state
494     //          with the data copied through the destination host buffer to reduce the size
495     //          of the copy.
496     // The device specifc runtime routine may return an error for the
497     // device to device case with separate devices. This code will attempt
498     // to decompose the call via bouncing through host memory.
499     //
500     // At present some cases, such as different devices where there is
501     // no host buffer, will return an error. Some of these could be
502     // handled by allocating temporary host memory.
503     //
504     // It is assumed that if two device runtimes have copy compatible buffers
505     // both will handle a copy between their types of buffers.
506 
507     // Give more descriptive names to conditions.
508     const bool from_device_valid = (src->device != 0) &&
509                                    (src->host == NULL || !src->host_dirty());
510     const bool to_device = dst_device_interface != NULL;
511     const bool to_host = dst_device_interface == NULL;
512     const bool from_host_exists = src->host != NULL;
513     const bool from_host_valid = from_host_exists &&
514                                  (!src->device_dirty() || (src->device_interface == NULL));
515     const bool to_host_exists = dst->host != NULL;
516 
517     if (to_host && !to_host_exists) {
518         return halide_error_code_host_is_null;
519     }
520 
521     // If a device to device copy is requested, try to do it directly.
522     err = halide_error_code_incompatible_device_interface;
523     if (from_device_valid && to_device) {
524         debug(user_context) << "halide_buffer_copy_already_locked: device to device case.\n";
525         err = dst_device_interface->impl->buffer_copy(user_context, src, dst_device_interface, dst);
526     }
527 
528     if (err == halide_error_code_incompatible_device_interface) {
529         // Return an error for a case that cannot make progress without a temporary allocation.
530         // TODO: go ahead and do the temp allocation.
531         if (!from_host_exists && !to_host_exists) {
532             debug(user_context) << "halide_buffer_copy_already_locked: failing due to need for temp buffer.\n";
533             return halide_error_code_incompatible_device_interface;
534         }
535 
536         if (to_host && from_host_valid) {
537             device_copy c = make_buffer_copy(src, true, dst, true);
538             copy_memory(c, user_context);
539             err = 0;
540         } else if (to_host) {
541             debug(user_context) << "halide_buffer_copy_already_locked: to host case.\n";
542             err = src->device_interface->impl->buffer_copy(user_context, src, NULL, dst);
543             // Return on success or an error indicating something other
544             // than not handling this case went wrong.
545             if (err == halide_error_code_incompatible_device_interface) {
546                 err = copy_to_host_already_locked(user_context, src);
547                 if (!err) {
548                     err = halide_buffer_copy_already_locked(user_context, src, NULL, dst);
549                 }
550             }
551         } else {
552             if (from_device_valid && to_host_exists) {
553                 debug(user_context) << "halide_buffer_copy_already_locked: from_device_valid && to_host_exists case.\n";
554                 // dev -> dev via dst host memory
555                 debug(user_context) << " device -> device via dst host memory\n";
556                 err = src->device_interface->impl->buffer_copy(user_context, src, NULL, dst);
557                 if (err == 0) {
558                     dst->set_host_dirty(true);
559                     err = copy_to_device_already_locked(user_context, dst, dst_device_interface);
560                 }
561             } else {
562                 debug(user_context) << "halide_buffer_copy_already_locked: dev -> dev via src host memory.\n";
563                 // dev -> dev via src host memory.
564                 err = copy_to_host_already_locked(user_context, src);
565                 if (err == 0) {
566                     err = dst_device_interface->impl->buffer_copy(user_context, src, dst_device_interface, dst);
567                 }
568             }
569         }
570     }
571 
572     if (err != 0) {
573         debug(user_context) << "halide_buffer_copy_already_locked: got error " << err << ".\n";
574     }
575     if (err == 0 && dst != src) {
576         if (dst_device_interface) {
577             debug(user_context) << "halide_buffer_copy_already_locked: setting device dirty.\n";
578             dst->set_host_dirty(false);
579             dst->set_device_dirty(true);
580         } else {
581             debug(user_context) << "halide_buffer_copy_already_locked: setting host dirty.\n";
582             dst->set_host_dirty(true);
583             dst->set_device_dirty(false);
584         }
585     }
586 
587     return err;
588 }
589 
halide_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)590 WEAK int halide_buffer_copy(void *user_context, struct halide_buffer_t *src,
591                             const struct halide_device_interface_t *dst_device_interface,
592                             struct halide_buffer_t *dst) {
593     debug(user_context) << "halide_buffer_copy:\n"
594                         << " src " << *src << "\n"
595                         << " interface " << dst_device_interface << "\n"
596                         << " dst " << *dst << "\n";
597 
598     ScopedMutexLock lock(&device_copy_mutex);
599 
600     if (dst_device_interface) {
601         dst_device_interface->impl->use_module();
602     }
603     if (src->device_interface) {
604         src->device_interface->impl->use_module();
605     }
606 
607     int err = halide_buffer_copy_already_locked(user_context, src, dst_device_interface, dst);
608 
609     if (dst_device_interface) {
610         dst_device_interface->impl->release_module();
611     }
612     if (src->device_interface) {
613         src->device_interface->impl->release_module();
614     }
615 
616     return err;
617 }
618 
halide_default_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)619 WEAK int halide_default_device_crop(void *user_context,
620                                     const struct halide_buffer_t *src,
621                                     struct halide_buffer_t *dst) {
622     halide_error(user_context, "device_interface does not support cropping\n");
623     return halide_error_code_device_crop_unsupported;
624 }
625 
halide_default_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)626 WEAK int halide_default_device_slice(void *user_context,
627                                      const struct halide_buffer_t *src,
628                                      int slice_dim, int slice_pos,
629                                      struct halide_buffer_t *dst) {
630     halide_error(user_context, "device_interface does not support slicing\n");
631     return halide_error_code_device_crop_unsupported;
632 }
633 
halide_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)634 WEAK int halide_device_crop(void *user_context,
635                             const struct halide_buffer_t *src,
636                             struct halide_buffer_t *dst) {
637     ScopedMutexLock lock(&device_copy_mutex);
638 
639     if (!src->device) {
640         return 0;
641     }
642 
643     if (dst->device) {
644         halide_error(user_context, "destination buffer already has a device allocation\n");
645         return halide_error_code_device_crop_failed;
646     }
647 
648     if (src->dimensions != dst->dimensions) {
649         halide_error(user_context, "src and dst must have identical dimensionality\n");
650         return halide_error_code_device_crop_failed;
651     }
652 
653     src->device_interface->impl->use_module();
654     int err = src->device_interface->impl->device_crop(user_context, src, dst);
655 
656     debug(user_context) << "halide_device_crop "
657                         << "\n"
658                         << " src: " << *src << "\n"
659                         << " dst: " << *dst << "\n";
660 
661     return err;
662 }
663 
halide_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)664 WEAK int halide_device_slice(void *user_context,
665                              const struct halide_buffer_t *src,
666                              int slice_dim, int slice_pos,
667                              struct halide_buffer_t *dst) {
668     ScopedMutexLock lock(&device_copy_mutex);
669 
670     if (!src->device) {
671         return 0;
672     }
673 
674     if (dst->device) {
675         halide_error(user_context, "destination buffer already has a device allocation\n");
676         return halide_error_code_device_crop_failed;
677     }
678 
679     if (src->dimensions != dst->dimensions + 1) {
680         halide_error(user_context, "dst must have exactly one fewer dimension than src\n");
681         return halide_error_code_device_crop_failed;
682     }
683 
684     src->device_interface->impl->use_module();
685     int err = src->device_interface->impl->device_slice(user_context, src, slice_dim, slice_pos, dst);
686 
687     debug(user_context) << "halide_device_crop "
688                         << "\n"
689                         << " src: " << *src << "\n"
690                         << " dst: " << *dst << "\n";
691 
692     return err;
693 }
694 
halide_default_device_release_crop(void * user_context,struct halide_buffer_t * buf)695 WEAK int halide_default_device_release_crop(void *user_context,
696                                             struct halide_buffer_t *buf) {
697     if (!buf->device) {
698         return 0;
699     }
700     halide_error(user_context, "device_interface does not support cropping\n");
701     return halide_error_code_device_crop_unsupported;
702 }
703 
halide_device_release_crop(void * user_context,struct halide_buffer_t * buf)704 WEAK int halide_device_release_crop(void *user_context,
705                                     struct halide_buffer_t *buf) {
706     if (buf->device) {
707         ScopedMutexLock lock(&device_copy_mutex);
708         const struct halide_device_interface_t *interface = buf->device_interface;
709         int result = interface->impl->device_release_crop(user_context, buf);
710         buf->device = 0;
711         interface->impl->release_module();
712         buf->device_interface = NULL;
713         return result;
714     }
715     return 0;
716 }
717 
718 }  // extern "C" linkage
719