1 #include "HalideRuntimeHexagonHost.h"
2 #include "device_buffer_utils.h"
3 #include "device_interface.h"
4 #include "printer.h"
5 #include "runtime_internal.h"
6 #include "scoped_mutex_lock.h"
7 
8 namespace Halide {
9 namespace Runtime {
10 namespace Internal {
11 namespace Hexagon {
12 
13 struct ion_device_handle {
14     void *buffer;
15     size_t size;
16 };
17 
18 WEAK halide_mutex thread_lock = {{0}};
19 
20 extern WEAK halide_device_interface_t hexagon_device_interface;
21 
22 // Define dynamic version of hexagon_remote/halide_hexagon_remote.h
23 typedef struct _remote_buffer__seq_octet _remote_buffer__seq_octet;
24 typedef _remote_buffer__seq_octet remote_buffer;
25 struct _remote_buffer__seq_octet {
26     unsigned char *data;
27     int dataLen;
28 };
29 
30 typedef int (*remote_load_library_fn)(const char *, int, const unsigned char *, int, halide_hexagon_handle_t *);
31 typedef int (*remote_get_symbol_fn)(halide_hexagon_handle_t, const char *, int, halide_hexagon_handle_t *);
32 typedef int (*remote_run_fn)(halide_hexagon_handle_t, int,
33                              const remote_buffer *, int, const remote_buffer *, int,
34                              remote_buffer *, int);
35 typedef int (*remote_release_library_fn)(halide_hexagon_handle_t);
36 typedef int (*remote_poll_log_fn)(char *, int, int *);
37 typedef void (*remote_poll_profiler_state_fn)(int *, int *);
38 typedef int (*remote_profiler_set_current_func_fn)(int);
39 typedef int (*remote_power_fn)();
40 typedef int (*remote_power_mode_fn)(int);
41 typedef int (*remote_power_perf_fn)(int, unsigned int, unsigned int, int, unsigned int, unsigned int, int, int);
42 typedef int (*remote_thread_priority_fn)(int);
43 
44 typedef void (*host_malloc_init_fn)();
45 typedef void *(*host_malloc_fn)(size_t);
46 typedef void (*host_free_fn)(void *);
47 
48 WEAK remote_load_library_fn remote_load_library = NULL;
49 WEAK remote_get_symbol_fn remote_get_symbol = NULL;
50 WEAK remote_run_fn remote_run = NULL;
51 WEAK remote_release_library_fn remote_release_library = NULL;
52 WEAK remote_poll_log_fn remote_poll_log = NULL;
53 WEAK remote_poll_profiler_state_fn remote_poll_profiler_state = NULL;
54 WEAK remote_profiler_set_current_func_fn remote_profiler_set_current_func = NULL;
55 WEAK remote_power_fn remote_power_hvx_on = NULL;
56 WEAK remote_power_fn remote_power_hvx_off = NULL;
57 WEAK remote_power_perf_fn remote_set_performance = NULL;
58 WEAK remote_power_mode_fn remote_set_performance_mode = NULL;
59 WEAK remote_thread_priority_fn remote_set_thread_priority = NULL;
60 
61 WEAK host_malloc_init_fn host_malloc_init = NULL;
62 WEAK host_malloc_init_fn host_malloc_deinit = NULL;
63 WEAK host_malloc_fn host_malloc = NULL;
64 WEAK host_free_fn host_free = NULL;
65 
66 // This checks if there are any log messages available on the remote
67 // side. It should be called after every remote call.
poll_log(void * user_context)68 WEAK void poll_log(void *user_context) {
69     if (!remote_poll_log) return;
70 
71     while (true) {
72         char message[1024];
73         int read = 0;
74         int result = remote_poll_log(&message[0], sizeof(message), &read);
75         if (result != 0) {
76             // Don't make this an error, otherwise we might obscure
77             // more information about errors that would come later.
78             print(user_context) << "Hexagon: remote_poll_log failed " << result << "\n";
79             return;
80         }
81 
82         if (read > 0) {
83             halide_print(user_context, message);
84         } else {
85             break;
86         }
87     }
88 }
89 
get_remote_profiler_state(int * func,int * threads)90 WEAK void get_remote_profiler_state(int *func, int *threads) {
91     if (!remote_poll_profiler_state) {
92         // This should only have been called if there's a remote profiler func installed.
93         error(NULL) << "Hexagon: remote_poll_profiler_func not found\n";
94     }
95 
96     remote_poll_profiler_state(func, threads);
97 }
98 
99 template<typename T>
get_symbol(void * user_context,void * host_lib,const char * name,T & sym,bool required=true)100 ALWAYS_INLINE void get_symbol(void *user_context, void *host_lib, const char *name, T &sym, bool required = true) {
101     debug(user_context) << "    halide_get_library_symbol('" << name << "') -> \n";
102     sym = (T)halide_get_library_symbol(host_lib, name);
103     debug(user_context) << "        " << (void *)sym << "\n";
104     if (!sym && required) {
105         error(user_context) << "Required Hexagon runtime symbol '" << name << "' not found.\n";
106     }
107 }
108 
109 // Load the hexagon remote runtime.
init_hexagon_runtime(void * user_context)110 WEAK int init_hexagon_runtime(void *user_context) {
111     if (remote_load_library && remote_run && remote_release_library) {
112         // Already loaded.
113         return 0;
114     }
115 
116     // The "support library" for Hexagon is essentially a way to delegate Hexagon
117     // code execution based on the runtime; devices with Hexagon hardware will
118     // simply provide conduits for execution on that hardware, while test/desktop/etc
119     // environments can instead connect a simulator via the API.
120     // Load the .so for Linux or Android, and if that fails try the .dll
121     // as we may be running the windows hosted simulator.
122     void *host_lib = halide_load_library("libhalide_hexagon_host.so");
123     if (!host_lib) {
124         host_lib = halide_load_library("libhalide_hexagon_host.dll");
125     }
126 
127     debug(user_context) << "Hexagon: init_hexagon_runtime (user_context: " << user_context << ")\n";
128 
129     // Get the symbols we need from the library.
130     get_symbol(user_context, host_lib, "halide_hexagon_remote_load_library", remote_load_library);
131     if (!remote_load_library) return -1;
132     get_symbol(user_context, host_lib, "halide_hexagon_remote_get_symbol_v4", remote_get_symbol);
133     if (!remote_get_symbol) return -1;
134     get_symbol(user_context, host_lib, "halide_hexagon_remote_run", remote_run);
135     if (!remote_run) return -1;
136     get_symbol(user_context, host_lib, "halide_hexagon_remote_release_library", remote_release_library);
137     if (!remote_release_library) return -1;
138 
139     get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_init", host_malloc_init);
140     if (!host_malloc_init) return -1;
141     get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_deinit", host_malloc_deinit);
142     if (!host_malloc_deinit) return -1;
143     get_symbol(user_context, host_lib, "halide_hexagon_host_malloc", host_malloc);
144     if (!host_malloc) return -1;
145     get_symbol(user_context, host_lib, "halide_hexagon_host_free", host_free);
146     if (!host_free) return -1;
147 
148     // These symbols are optional.
149     get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_log", remote_poll_log, /* required */ false);
150     get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_profiler_state", remote_poll_profiler_state, /* required */ false);
151     get_symbol(user_context, host_lib, "halide_hexagon_remote_profiler_set_current_func", remote_profiler_set_current_func, /* required */ false);
152 
153     // If these are unavailable, then the runtime always powers HVX on and so these are not necessary.
154     get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_on", remote_power_hvx_on, /* required */ false);
155     get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_off", remote_power_hvx_off, /* required */ false);
156     get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance", remote_set_performance, /* required */ false);
157     get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance_mode", remote_set_performance_mode, /* required */ false);
158     get_symbol(user_context, host_lib, "halide_hexagon_remote_set_thread_priority", remote_set_thread_priority, /* required */ false);
159 
160     host_malloc_init();
161 
162     return 0;
163 }
164 
165 // Structure to hold the state of a module attached to the context.
166 // Also used as a linked-list to keep track of all the different
167 // modules that are attached to a context in order to release them all
168 // when then context is released.
169 struct module_state {
170     halide_hexagon_handle_t module;
171     module_state *next;
172 };
173 WEAK module_state *state_list = NULL;
174 WEAK halide_hexagon_handle_t shared_runtime = 0;
175 
176 #ifdef DEBUG_RUNTIME
177 
178 // In debug builds, we write shared objects to the current directory (without
179 // failing on errors).
write_shared_object(void * user_context,const char * path,const uint8_t * code,uint64_t code_size)180 WEAK void write_shared_object(void *user_context, const char *path,
181                               const uint8_t *code, uint64_t code_size) {
182     void *f = fopen(path, "wb");
183     if (!f) {
184         debug(user_context) << "    failed to write shared object to '" << path << "'\n";
185         return;
186     }
187     size_t written = fwrite(code, 1, code_size, f);
188     if (written != code_size) {
189         debug(user_context) << "    bad write of shared object to '" << path << "'\n";
190     }
191     fclose(f);
192 }
193 
194 #endif
195 
196 }  // namespace Hexagon
197 }  // namespace Internal
198 }  // namespace Runtime
199 }  // namespace Halide
200 
201 using namespace Halide::Runtime::Internal;
202 using namespace Halide::Runtime::Internal::Hexagon;
203 
204 extern "C" {
205 
halide_is_hexagon_available(void * user_context)206 WEAK bool halide_is_hexagon_available(void *user_context) {
207     int result = init_hexagon_runtime(user_context);
208     return result == 0;
209 }
210 
halide_hexagon_initialize_kernels(void * user_context,void ** state_ptr,const uint8_t * code,uint64_t code_size,const uint8_t * runtime,uint64_t runtime_size)211 WEAK int halide_hexagon_initialize_kernels(void *user_context, void **state_ptr,
212                                            const uint8_t *code, uint64_t code_size,
213                                            const uint8_t *runtime, uint64_t runtime_size) {
214     int result = init_hexagon_runtime(user_context);
215     if (result != 0) return result;
216     debug(user_context) << "Hexagon: halide_hexagon_initialize_kernels (user_context: " << user_context
217                         << ", state_ptr: " << state_ptr
218                         << ", *state_ptr: " << *state_ptr
219                         << ", code: " << code
220                         << ", code_size: " << (int)code_size << ")\n"
221                         << ", code: " << runtime
222                         << ", code_size: " << (int)runtime_size << ")\n";
223     halide_assert(user_context, state_ptr != NULL);
224 
225 #ifdef DEBUG_RUNTIME
226     uint64_t t_before = halide_current_time_ns(user_context);
227 #endif
228 
229     // Create the state object if necessary. This only happens once,
230     // regardless of how many times halide_hexagon_initialize_kernels
231     // or halide_hexagon_device_release is called.
232     // halide_hexagon_device_release traverses this list and releases
233     // the module objects, but it does not modify the list nodes
234     // created/inserted here.
235     ScopedMutexLock lock(&thread_lock);
236 
237     // Initialize the runtime, if necessary.
238     if (!shared_runtime) {
239         debug(user_context) << "    Initializing shared runtime\n";
240         const char soname[] = "libhalide_shared_runtime.so";
241 #ifdef DEBUG_RUNTIME
242         debug(user_context) << "    Writing shared object '" << soname << "'\n";
243         write_shared_object(user_context, soname, runtime, runtime_size);
244 #endif
245         debug(user_context) << "    halide_remote_load_library(" << soname << ") -> ";
246         result = remote_load_library(soname, sizeof(soname), runtime, runtime_size, &shared_runtime);
247         poll_log(user_context);
248         if (result == 0) {
249             debug(user_context) << "        " << (void *)(size_t)shared_runtime << "\n";
250             halide_assert(user_context, shared_runtime != 0);
251         } else {
252             debug(user_context) << "        " << result << "\n";
253             error(user_context) << "Initialization of Hexagon kernels failed\n";
254             shared_runtime = 0;
255         }
256     } else {
257         debug(user_context) << "    re-using existing shared runtime " << (void *)(size_t)shared_runtime << "\n";
258     }
259 
260     if (result != 0) {
261         return -1;
262     }
263 
264     module_state **state = (module_state **)state_ptr;
265     if (!(*state)) {
266         debug(user_context) << "    allocating module state -> \n";
267         *state = (module_state *)malloc(sizeof(module_state));
268         debug(user_context) << "        " << *state << "\n";
269         (*state)->module = 0;
270         (*state)->next = state_list;
271         state_list = *state;
272     }
273 
274     // Create the module itself if necessary.
275     if (!(*state)->module) {
276         static int unique_id = 0;
277         stringstream soname(user_context);
278         soname << "libhalide_kernels" << unique_id++ << ".so";
279 #ifdef DEBUG_RUNTIME
280         debug(user_context) << "    Writing shared object '" << soname.str() << "'\n";
281         write_shared_object(user_context, soname.str(), code, code_size);
282 #endif
283         debug(user_context) << "    halide_remote_load_library(" << soname.str() << ") -> ";
284         halide_hexagon_handle_t module = 0;
285         result = remote_load_library(soname.str(), soname.size() + 1, code, code_size, &module);
286         poll_log(user_context);
287         if (result == 0) {
288             debug(user_context) << "        " << (void *)(size_t)module << "\n";
289             (*state)->module = module;
290         } else {
291             debug(user_context) << "        " << result << "\n";
292             error(user_context) << "Initialization of Hexagon kernels failed\n";
293         }
294     } else {
295         debug(user_context) << "    re-using existing module " << (void *)(size_t)(*state)->module << "\n";
296     }
297 
298 #ifdef DEBUG_RUNTIME
299     uint64_t t_after = halide_current_time_ns(user_context);
300     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
301 #endif
302 
303     return result != 0 ? -1 : 0;
304 }
305 
306 namespace {
307 
308 // Prepare an array of remote_buffer arguments, mapping buffers if
309 // necessary. Only arguments with flags&flag_mask == flag_value are
310 // added to the mapped_args array. Returns the number of arguments
311 // mapped, or a negative number on error.
map_arguments(void * user_context,int arg_count,uint64_t arg_sizes[],void * args[],int arg_flags[],int flag_mask,int flag_value,remote_buffer * mapped_args)312 WEAK int map_arguments(void *user_context, int arg_count,
313                        uint64_t arg_sizes[], void *args[], int arg_flags[], int flag_mask, int flag_value,
314                        remote_buffer *mapped_args) {
315     int mapped_count = 0;
316     for (int i = 0; i < arg_count; i++) {
317         if ((arg_flags[i] & flag_mask) != flag_value) continue;
318         remote_buffer &mapped_arg = mapped_args[mapped_count++];
319         if (arg_flags[i] != 0) {
320             // This is the way that HexagonOffload packages arguments for us.
321             struct hexagon_device_pointer {
322                 uint64_t dev;
323                 uint8_t *host;
324             };
325             const hexagon_device_pointer *b = (hexagon_device_pointer *)args[i];
326             uint64_t device = b->dev;
327             uint8_t *host = b->host;
328             if (device) {
329                 // This argument has a device handle.
330                 ion_device_handle *ion_handle = reinterpret<ion_device_handle *>(device);
331                 debug(user_context) << i << ", " << device << "\n";
332                 mapped_arg.data = reinterpret_cast<uint8_t *>(ion_handle->buffer);
333                 mapped_arg.dataLen = ion_handle->size;
334             } else {
335                 // This is just a host buffer, and the size is passed in as the arg size.
336                 mapped_arg.data = host;
337                 mapped_arg.dataLen = arg_sizes[i];
338             }
339         } else {
340             // This is a scalar, just put the pointer/size in the result.
341             mapped_arg.data = (uint8_t *)args[i];
342             mapped_arg.dataLen = arg_sizes[i];
343         }
344     }
345     return mapped_count;
346 }
347 
348 }  // namespace
349 
halide_hexagon_run(void * user_context,void * state_ptr,const char * name,halide_hexagon_handle_t * function,uint64_t arg_sizes[],void * args[],int arg_flags[])350 WEAK int halide_hexagon_run(void *user_context,
351                             void *state_ptr,
352                             const char *name,
353                             halide_hexagon_handle_t *function,
354                             uint64_t arg_sizes[],
355                             void *args[],
356                             int arg_flags[]) {
357     halide_assert(user_context, state_ptr != NULL);
358     halide_assert(user_context, function != NULL);
359     int result = init_hexagon_runtime(user_context);
360     if (result != 0) return result;
361 
362     halide_hexagon_handle_t module = state_ptr ? ((module_state *)state_ptr)->module : 0;
363     debug(user_context) << "Hexagon: halide_hexagon_run ("
364                         << "user_context: " << user_context << ", "
365                         << "state_ptr: " << state_ptr << " (" << module << "), "
366                         << "name: " << name << ", "
367                         << "function: " << function << " (" << *function << "))\n";
368 
369     // If we haven't gotten the symbol for this function, do so now.
370     if (*function == 0) {
371         debug(user_context) << "    halide_hexagon_remote_get_symbol " << name << " -> ";
372         halide_hexagon_handle_t sym = 0;
373         int result = remote_get_symbol(module, name, strlen(name) + 1, &sym);
374         *function = result == 0 ? sym : 0;
375         poll_log(user_context);
376         debug(user_context) << "        " << *function << "\n";
377         if (*function == 0) {
378             error(user_context) << "Failed to find function " << name << " in module.\n";
379             return -1;
380         }
381     }
382 
383     // Allocate some remote_buffer objects on the stack.
384     int arg_count = 0;
385     while (arg_sizes[arg_count] > 0)
386         arg_count++;
387     remote_buffer *mapped_buffers =
388         (remote_buffer *)__builtin_alloca(arg_count * sizeof(remote_buffer));
389 
390     // Map the arguments.
391     // First grab the input buffers (bit 0 of flags is set).
392     remote_buffer *input_buffers = mapped_buffers;
393     int input_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x1,
394                                            input_buffers);
395     if (input_buffer_count < 0) return input_buffer_count;
396 
397     // Then the output buffers (bit 1 of flags is set).
398     remote_buffer *output_buffers = input_buffers + input_buffer_count;
399     int output_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x2, 0x2,
400                                             output_buffers);
401     if (output_buffer_count < 0) return output_buffer_count;
402 
403     // And the input scalars (neither bits 0 or 1 of flags is set).
404     remote_buffer *input_scalars = output_buffers + output_buffer_count;
405     int input_scalar_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x0,
406                                            input_scalars);
407     if (input_scalar_count < 0) return input_scalar_count;
408 
409 #ifdef DEBUG_RUNTIME
410     uint64_t t_before = halide_current_time_ns(user_context);
411 #endif
412 
413     // If remote profiling is supported, tell the profiler to call
414     // get_remote_profiler_func to retrieve the current
415     // func. Otherwise leave it alone - the cost of remote running
416     // will be billed to the calling Func.
417     if (remote_poll_profiler_state) {
418         halide_profiler_get_state()->get_remote_profiler_state = get_remote_profiler_state;
419         if (remote_profiler_set_current_func) {
420             remote_profiler_set_current_func(halide_profiler_get_state()->current_func);
421         }
422     }
423 
424     // Call the pipeline on the device side.
425     debug(user_context) << "    halide_hexagon_remote_run -> ";
426     result = remote_run(module, *function,
427                         input_buffers, input_buffer_count,
428                         output_buffers, output_buffer_count,
429                         input_scalars, input_scalar_count);
430     poll_log(user_context);
431     debug(user_context) << "        " << result << "\n";
432     if (result != 0) {
433         error(user_context) << "Hexagon pipeline failed.\n";
434         return result;
435     }
436 
437     halide_profiler_get_state()->get_remote_profiler_state = NULL;
438 
439 #ifdef DEBUG_RUNTIME
440     uint64_t t_after = halide_current_time_ns(user_context);
441     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
442 #endif
443 
444     return result != 0 ? -1 : 0;
445 }
446 
halide_hexagon_device_release(void * user_context)447 WEAK int halide_hexagon_device_release(void *user_context) {
448     debug(user_context)
449         << "Hexagon: halide_hexagon_device_release (user_context: " << user_context << ")\n";
450 
451     ScopedMutexLock lock(&thread_lock);
452 
453     // Release all of the remote side modules.
454     module_state *state = state_list;
455     while (state) {
456         if (state->module) {
457             debug(user_context) << "    halide_remote_release_library " << state
458                                 << " (" << state->module << ") -> ";
459             int result = remote_release_library(state->module);
460             poll_log(user_context);
461             debug(user_context) << "        " << result << "\n";
462             state->module = 0;
463         }
464         state = state->next;
465     }
466     state_list = NULL;
467 
468     if (shared_runtime) {
469         debug(user_context) << "    releasing shared runtime\n";
470         debug(user_context) << "    halide_remote_release_library " << shared_runtime << " -> ";
471         int result = remote_release_library(shared_runtime);
472         poll_log(user_context);
473         debug(user_context) << "        " << result << "\n";
474         shared_runtime = 0;
475     }
476 
477     return 0;
478 }
479 
480 // When allocations for Hexagon are at least as large as this
481 // threshold, use an ION allocation (to get zero copy). If the
482 // allocation is smaller, use a standard allocation instead.  This is
483 // done because allocating an entire page for a small allocation is
484 // wasteful, and the copy is not significant.  Additionally, the
485 // FastRPC interface can probably do a better job with many small
486 // arguments than simply mapping the pages.
487 static const int min_ion_allocation_size = 4096;
488 
halide_hexagon_device_malloc(void * user_context,halide_buffer_t * buf)489 WEAK int halide_hexagon_device_malloc(void *user_context, halide_buffer_t *buf) {
490     int result = init_hexagon_runtime(user_context);
491     if (result != 0) return result;
492 
493     debug(user_context)
494         << "Hexagon: halide_hexagon_device_malloc (user_context: " << user_context
495         << ", buf: " << buf << ")\n";
496 
497     if (buf->device) {
498         // This buffer already has a device allocation
499         return 0;
500     }
501 
502     size_t size = buf->size_in_bytes();
503     halide_assert(user_context, size != 0);
504 
505     // Hexagon code generation generates clamped ramp loads in a way
506     // that requires up to an extra vector beyond the end of the
507     // buffer to be legal to access.
508     size += 128;
509 
510     for (int i = 0; i < buf->dimensions; i++) {
511         halide_assert(user_context, buf->dim[i].stride >= 0);
512     }
513 
514     debug(user_context) << "    allocating buffer of " << (uint64_t)size << " bytes\n";
515 
516 #ifdef DEBUG_RUNTIME
517     uint64_t t_before = halide_current_time_ns(user_context);
518 #endif
519 
520     void *ion;
521     if (size >= min_ion_allocation_size) {
522         debug(user_context) << "    host_malloc len=" << (uint64_t)size << " -> ";
523         ion = host_malloc(size);
524         debug(user_context) << "        " << ion << "\n";
525         if (!ion) {
526             error(user_context) << "host_malloc failed\n";
527             return -1;
528         }
529     } else {
530         debug(user_context) << "    halide_malloc size=" << (uint64_t)size << " -> ";
531         ion = halide_malloc(user_context, size);
532         debug(user_context) << "        " << ion << "\n";
533         if (!ion) {
534             error(user_context) << "halide_malloc failed\n";
535             return -1;
536         }
537     }
538 
539     int err = halide_hexagon_wrap_device_handle(user_context, buf, ion, size);
540     if (err != 0) {
541         if (size >= min_ion_allocation_size) {
542             host_free(ion);
543         } else {
544             halide_free(user_context, ion);
545         }
546         return err;
547     }
548 
549     if (!buf->host) {
550         // If the host pointer has also not been allocated yet, set it to
551         // the ion buffer. This buffer will be zero copy.
552         buf->host = (uint8_t *)ion;
553         debug(user_context) << "    host <- " << buf->host << "\n";
554     }
555 
556 #ifdef DEBUG_RUNTIME
557     uint64_t t_after = halide_current_time_ns(user_context);
558     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
559 #endif
560 
561     return 0;
562 }
563 
halide_hexagon_device_free(void * user_context,halide_buffer_t * buf)564 WEAK int halide_hexagon_device_free(void *user_context, halide_buffer_t *buf) {
565     debug(user_context)
566         << "Hexagon: halide_hexagon_device_free (user_context: " << user_context
567         << ", buf: " << buf << ")\n";
568 
569 #ifdef DEBUG_RUNTIME
570     uint64_t t_before = halide_current_time_ns(user_context);
571 #endif
572 
573     uint64_t size = halide_hexagon_get_device_size(user_context, buf);
574     void *ion = halide_hexagon_get_device_handle(user_context, buf);
575     halide_hexagon_detach_device_handle(user_context, buf);
576     if (size >= min_ion_allocation_size) {
577         debug(user_context) << "    host_free ion=" << ion << "\n";
578         host_free(ion);
579     } else {
580         debug(user_context) << "    halide_free ion=" << ion << "\n";
581         halide_free(user_context, ion);
582     }
583 
584     if (buf->host == ion) {
585         // If we also set the host pointer, reset it.
586         buf->host = NULL;
587         debug(user_context) << "    host <- 0x0\n";
588     }
589 
590 #ifdef DEBUG_RUNTIME
591     uint64_t t_after = halide_current_time_ns(user_context);
592     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
593 #endif
594 
595     // This is to match what the default implementation of halide_device_free does.
596     buf->set_device_dirty(false);
597     return 0;
598 }
599 
halide_hexagon_copy_to_device(void * user_context,halide_buffer_t * buf)600 WEAK int halide_hexagon_copy_to_device(void *user_context, halide_buffer_t *buf) {
601     int err = halide_hexagon_device_malloc(user_context, buf);
602     if (err) {
603         return err;
604     }
605 
606     debug(user_context)
607         << "Hexagon: halide_hexagon_copy_to_device (user_context: " << user_context
608         << ", buf: " << buf << ")\n";
609 
610 #ifdef DEBUG_RUNTIME
611     uint64_t t_before = halide_current_time_ns(user_context);
612 #endif
613 
614     halide_assert(user_context, buf->host && buf->device);
615     device_copy c = make_host_to_device_copy(buf);
616 
617     // Get the descriptor associated with the ion buffer.
618     c.dst = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
619     copy_memory(c, user_context);
620 
621 #ifdef DEBUG_RUNTIME
622     uint64_t t_after = halide_current_time_ns(user_context);
623     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
624 #endif
625 
626     return 0;
627 }
628 
halide_hexagon_copy_to_host(void * user_context,struct halide_buffer_t * buf)629 WEAK int halide_hexagon_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
630     debug(user_context)
631         << "Hexagon: halide_hexagon_copy_to_host (user_context: " << user_context
632         << ", buf: " << buf << ")\n";
633 
634 #ifdef DEBUG_RUNTIME
635     uint64_t t_before = halide_current_time_ns(user_context);
636 #endif
637 
638     halide_assert(user_context, buf->host && buf->device);
639     device_copy c = make_device_to_host_copy(buf);
640 
641     // Get the descriptor associated with the ion buffer.
642     c.src = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
643     copy_memory(c, user_context);
644 
645 #ifdef DEBUG_RUNTIME
646     uint64_t t_after = halide_current_time_ns(user_context);
647     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
648 #endif
649 
650     return 0;
651 }
652 
halide_hexagon_device_sync(void * user_context,struct halide_buffer_t *)653 WEAK int halide_hexagon_device_sync(void *user_context, struct halide_buffer_t *) {
654     debug(user_context)
655         << "Hexagon: halide_hexagon_device_sync (user_context: " << user_context << ")\n";
656     // Nothing to do.
657     return 0;
658 }
659 
halide_hexagon_wrap_device_handle(void * user_context,struct halide_buffer_t * buf,void * ion_buf,uint64_t size)660 WEAK int halide_hexagon_wrap_device_handle(void *user_context, struct halide_buffer_t *buf,
661                                            void *ion_buf, uint64_t size) {
662     halide_assert(user_context, buf->device == 0);
663     if (buf->device != 0) {
664         return -2;
665     }
666 
667     ion_device_handle *handle = (ion_device_handle *)malloc(sizeof(ion_device_handle));
668     if (!handle) {
669         return -1;
670     }
671     handle->buffer = ion_buf;
672     handle->size = size;
673     buf->device_interface = &hexagon_device_interface;
674     buf->device_interface->impl->use_module();
675     buf->device = reinterpret<uint64_t>(handle);
676     return 0;
677 }
678 
halide_hexagon_detach_device_handle(void * user_context,struct halide_buffer_t * buf)679 WEAK int halide_hexagon_detach_device_handle(void *user_context, struct halide_buffer_t *buf) {
680     if (buf->device == 0) {
681         return NULL;
682     }
683     halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
684     ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
685     free(handle);
686 
687     buf->device_interface->impl->release_module();
688     buf->device = 0;
689     buf->device_interface = NULL;
690     return 0;
691 }
692 
halide_hexagon_get_device_handle(void * user_context,struct halide_buffer_t * buf)693 WEAK void *halide_hexagon_get_device_handle(void *user_context, struct halide_buffer_t *buf) {
694     if (buf->device == 0) {
695         return NULL;
696     }
697     halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
698     ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
699     return handle->buffer;
700 }
701 
halide_hexagon_get_device_size(void * user_context,struct halide_buffer_t * buf)702 WEAK uint64_t halide_hexagon_get_device_size(void *user_context, struct halide_buffer_t *buf) {
703     if (buf->device == 0) {
704         return 0;
705     }
706     halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
707     ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
708     return handle->size;
709 }
710 
halide_hexagon_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf)711 WEAK int halide_hexagon_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
712     debug(user_context) << "halide_hexagon_device_and_host_malloc called.\n";
713     int result = halide_hexagon_device_malloc(user_context, buf);
714     if (result == 0) {
715         buf->host = (uint8_t *)halide_hexagon_get_device_handle(user_context, buf);
716     }
717     return result;
718 }
719 
halide_hexagon_device_and_host_free(void * user_context,struct halide_buffer_t * buf)720 WEAK int halide_hexagon_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
721     debug(user_context) << "halide_hexagon_device_and_host_free called.\n";
722     halide_hexagon_device_free(user_context, buf);
723     buf->host = NULL;
724     return 0;
725 }
726 
halide_hexagon_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)727 WEAK int halide_hexagon_buffer_copy(void *user_context, struct halide_buffer_t *src,
728                                     const struct halide_device_interface_t *dst_device_interface,
729                                     struct halide_buffer_t *dst) {
730     // We only handle copies to hexagon buffers or to host
731     halide_assert(user_context, dst_device_interface == NULL ||
732                                     dst_device_interface == &hexagon_device_interface);
733 
734     if ((src->device_dirty() || src->host == NULL) &&
735         src->device_interface != &hexagon_device_interface) {
736         halide_assert(user_context, dst_device_interface == &hexagon_device_interface);
737         // This is handled at the higher level.
738         return halide_error_code_incompatible_device_interface;
739     }
740 
741     bool from_host = (src->device_interface != &hexagon_device_interface) ||
742                      (src->device == 0) ||
743                      (src->host_dirty() && src->host != NULL);
744     bool to_host = !dst_device_interface;
745 
746     halide_assert(user_context, from_host || src->device);
747     halide_assert(user_context, to_host || dst->device);
748 
749 #ifdef DEBUG_RUNTIME
750     uint64_t t_before = halide_current_time_ns(user_context);
751 #endif
752 
753     device_copy c = make_buffer_copy(src, from_host, dst, to_host);
754 
755     int err = 0;
756 
757     // Get the descriptor associated with the ion buffer.
758     if (!from_host) {
759         c.src = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, src));
760     }
761     if (!to_host) {
762         c.dst = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, dst));
763     }
764     copy_memory(c, user_context);
765 
766 #ifdef DEBUG_RUNTIME
767     uint64_t t_after = halide_current_time_ns(user_context);
768     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
769 #endif
770 
771     return err;
772 }
773 
774 namespace {
775 
hexagon_device_crop_from_offset(const struct halide_buffer_t * src,int64_t offset,struct halide_buffer_t * dst)776 WEAK int hexagon_device_crop_from_offset(const struct halide_buffer_t *src, int64_t offset, struct halide_buffer_t *dst) {
777     ion_device_handle *src_handle = (ion_device_handle *)src->device;
778     ion_device_handle *dst_handle = (ion_device_handle *)malloc(sizeof(ion_device_handle));
779     if (!dst_handle) {
780         return halide_error_code_out_of_memory;
781     }
782 
783     dst_handle->buffer = (uint8_t *)src_handle->buffer + offset;
784     dst_handle->size = src_handle->size - offset;
785     dst->device = reinterpret<uint64_t>(dst_handle);
786     dst->device_interface = src->device_interface;
787     dst->set_device_dirty(src->device_dirty());
788     return 0;
789 }
790 
791 }  // namespace
792 
halide_hexagon_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)793 WEAK int halide_hexagon_device_crop(void *user_context, const struct halide_buffer_t *src,
794                                     struct halide_buffer_t *dst) {
795     debug(user_context) << "halide_hexagon_device_crop called.\n";
796 
797     const int64_t offset = calc_device_crop_byte_offset(src, dst);
798     return hexagon_device_crop_from_offset(src, offset, dst);
799 }
800 
halide_hexagon_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)801 WEAK int halide_hexagon_device_slice(void *user_context, const struct halide_buffer_t *src,
802                                      int slice_dim, int slice_pos, struct halide_buffer_t *dst) {
803     debug(user_context) << "halide_hexagon_device_slice called.\n";
804 
805     const int64_t offset = calc_device_slice_byte_offset(src, slice_dim, slice_pos);
806     return hexagon_device_crop_from_offset(src, offset, dst);
807 }
808 
halide_hexagon_device_release_crop(void * user_context,struct halide_buffer_t * dst)809 WEAK int halide_hexagon_device_release_crop(void *user_context, struct halide_buffer_t *dst) {
810     debug(user_context) << "halide_hexagon_release_crop called\n";
811     free((ion_device_handle *)dst->device);
812     return 0;
813 }
814 
halide_hexagon_power_hvx_on(void * user_context)815 WEAK int halide_hexagon_power_hvx_on(void *user_context) {
816     int result = init_hexagon_runtime(user_context);
817     if (result != 0) return result;
818 
819     debug(user_context) << "halide_hexagon_power_hvx_on\n";
820     if (!remote_power_hvx_on) {
821         // The function is not available in this version of the
822         // runtime, this runtime always powers HVX on.
823         return 0;
824     }
825 
826 #ifdef DEBUG_RUNTIME
827     uint64_t t_before = halide_current_time_ns(user_context);
828 #endif
829 
830     debug(user_context) << "    remote_power_hvx_on -> ";
831     result = remote_power_hvx_on();
832     debug(user_context) << "        " << result << "\n";
833     if (result != 0) {
834         error(user_context) << "remote_power_hvx_on failed.\n";
835         return result;
836     }
837 
838 #ifdef DEBUG_RUNTIME
839     uint64_t t_after = halide_current_time_ns(user_context);
840     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
841 #endif
842 
843     return 0;
844 }
845 
halide_hexagon_power_hvx_off(void * user_context)846 WEAK int halide_hexagon_power_hvx_off(void *user_context) {
847     int result = init_hexagon_runtime(user_context);
848     if (result != 0) return result;
849 
850     debug(user_context) << "halide_hexagon_power_hvx_off\n";
851     if (!remote_power_hvx_off) {
852         // The function is not available in this version of the
853         // runtime, this runtime always powers HVX on.
854         return 0;
855     }
856 
857 #ifdef DEBUG_RUNTIME
858     uint64_t t_before = halide_current_time_ns(user_context);
859 #endif
860 
861     debug(user_context) << "    remote_power_hvx_off -> ";
862     result = remote_power_hvx_off();
863     debug(user_context) << "        " << result << "\n";
864     if (result != 0) {
865         error(user_context) << "remote_power_hvx_off failed.\n";
866         return result;
867     }
868 
869 #ifdef DEBUG_RUNTIME
870     uint64_t t_after = halide_current_time_ns(user_context);
871     debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
872 #endif
873 
874     return 0;
875 }
876 
halide_hexagon_power_hvx_off_as_destructor(void * user_context,void *)877 WEAK void halide_hexagon_power_hvx_off_as_destructor(void *user_context, void * /* obj */) {
878     halide_hexagon_power_hvx_off(user_context);
879 }
880 
halide_hexagon_set_performance_mode(void * user_context,halide_hexagon_power_mode_t mode)881 WEAK int halide_hexagon_set_performance_mode(void *user_context, halide_hexagon_power_mode_t mode) {
882     int result = init_hexagon_runtime(user_context);
883     if (result != 0) return result;
884 
885     debug(user_context) << "halide_hexagon_set_performance_mode\n";
886     if (!remote_set_performance_mode) {
887         // This runtime doesn't support changing the performance target.
888         return 0;
889     }
890 
891     debug(user_context) << "    remote_set_performance_mode -> ";
892     result = remote_set_performance_mode(mode);
893     debug(user_context) << "        " << result << "\n";
894     if (result != 0) {
895         error(user_context) << "remote_set_performance_mode failed.\n";
896         return result;
897     }
898 
899     return 0;
900 }
901 
halide_hexagon_set_performance(void * user_context,halide_hexagon_power_t * perf)902 WEAK int halide_hexagon_set_performance(void *user_context, halide_hexagon_power_t *perf) {
903     int result = init_hexagon_runtime(user_context);
904     if (result != 0) return result;
905 
906     debug(user_context) << "halide_hexagon_set_performance\n";
907     if (!remote_set_performance) {
908         // This runtime doesn't support changing the performance target.
909         return 0;
910     }
911 
912     debug(user_context) << "    remote_set_performance -> ";
913     result = remote_set_performance(perf->set_mips,
914                                     perf->mipsPerThread,
915                                     perf->mipsTotal,
916                                     perf->set_bus_bw,
917                                     perf->bwMegabytesPerSec,
918                                     perf->busbwUsagePercentage,
919                                     perf->set_latency,
920                                     perf->latency);
921 
922     debug(user_context) << "        " << result << "\n";
923     if (result != 0) {
924         error(user_context) << "remote_set_performance failed.\n";
925         return result;
926     }
927 
928     return 0;
929 }
930 
halide_hexagon_set_thread_priority(void * user_context,int priority)931 WEAK int halide_hexagon_set_thread_priority(void *user_context, int priority) {
932     int result = init_hexagon_runtime(user_context);
933     if (result != 0) return result;
934 
935     debug(user_context) << "halide_hexagon_set_thread_priority\n";
936     if (!remote_set_thread_priority) {
937         // This runtime doesn't support changing the thread priority.
938         return 0;
939     }
940 
941     debug(user_context) << "    remote_set_thread_priority -> ";
942     result = remote_set_thread_priority(priority);
943     debug(user_context) << "        " << result << "\n";
944     if (result != 0) {
945         error(user_context) << "remote_set_thread_priority failed.\n";
946         return result;
947     }
948 
949     return 0;
950 }
951 
halide_hexagon_device_interface()952 WEAK const halide_device_interface_t *halide_hexagon_device_interface() {
953     return &hexagon_device_interface;
954 }
955 
956 namespace {
halide_hexagon_cleanup()957 WEAK __attribute__((destructor)) void halide_hexagon_cleanup() {
958     halide_hexagon_device_release(NULL);
959 }
960 }  // namespace
961 
962 }  // extern "C" linkage
963 
964 namespace Halide {
965 namespace Runtime {
966 namespace Internal {
967 namespace Hexagon {
968 
969 WEAK halide_device_interface_impl_t hexagon_device_interface_impl = {
970     halide_use_jit_module,
971     halide_release_jit_module,
972     halide_hexagon_device_malloc,
973     halide_hexagon_device_free,
974     halide_hexagon_device_sync,
975     halide_hexagon_device_release,
976     halide_hexagon_copy_to_host,
977     halide_hexagon_copy_to_device,
978     halide_hexagon_device_and_host_malloc,
979     halide_hexagon_device_and_host_free,
980     halide_hexagon_buffer_copy,
981     halide_hexagon_device_crop,
982     halide_hexagon_device_slice,
983     halide_hexagon_device_release_crop,
984     halide_default_device_wrap_native,
985     halide_default_device_detach_native,
986 };
987 
988 WEAK halide_device_interface_t hexagon_device_interface = {
989     halide_device_malloc,
990     halide_device_free,
991     halide_device_sync,
992     halide_device_release,
993     halide_copy_to_host,
994     halide_copy_to_device,
995     halide_device_and_host_malloc,
996     halide_device_and_host_free,
997     halide_buffer_copy,
998     halide_device_crop,
999     halide_device_slice,
1000     halide_device_release_crop,
1001     halide_device_wrap_native,
1002     halide_device_detach_native,
1003     NULL,
1004     &hexagon_device_interface_impl};
1005 
1006 }  // namespace Hexagon
1007 }  // namespace Internal
1008 }  // namespace Runtime
1009 }  // namespace Halide
1010