1 #include "HalideRuntimeHexagonHost.h"
2 #include "device_buffer_utils.h"
3 #include "device_interface.h"
4 #include "printer.h"
5 #include "runtime_internal.h"
6 #include "scoped_mutex_lock.h"
7
8 namespace Halide {
9 namespace Runtime {
10 namespace Internal {
11 namespace Hexagon {
12
13 struct ion_device_handle {
14 void *buffer;
15 size_t size;
16 };
17
18 WEAK halide_mutex thread_lock = {{0}};
19
20 extern WEAK halide_device_interface_t hexagon_device_interface;
21
22 // Define dynamic version of hexagon_remote/halide_hexagon_remote.h
23 typedef struct _remote_buffer__seq_octet _remote_buffer__seq_octet;
24 typedef _remote_buffer__seq_octet remote_buffer;
25 struct _remote_buffer__seq_octet {
26 unsigned char *data;
27 int dataLen;
28 };
29
30 typedef int (*remote_load_library_fn)(const char *, int, const unsigned char *, int, halide_hexagon_handle_t *);
31 typedef int (*remote_get_symbol_fn)(halide_hexagon_handle_t, const char *, int, halide_hexagon_handle_t *);
32 typedef int (*remote_run_fn)(halide_hexagon_handle_t, int,
33 const remote_buffer *, int, const remote_buffer *, int,
34 remote_buffer *, int);
35 typedef int (*remote_release_library_fn)(halide_hexagon_handle_t);
36 typedef int (*remote_poll_log_fn)(char *, int, int *);
37 typedef void (*remote_poll_profiler_state_fn)(int *, int *);
38 typedef int (*remote_profiler_set_current_func_fn)(int);
39 typedef int (*remote_power_fn)();
40 typedef int (*remote_power_mode_fn)(int);
41 typedef int (*remote_power_perf_fn)(int, unsigned int, unsigned int, int, unsigned int, unsigned int, int, int);
42 typedef int (*remote_thread_priority_fn)(int);
43
44 typedef void (*host_malloc_init_fn)();
45 typedef void *(*host_malloc_fn)(size_t);
46 typedef void (*host_free_fn)(void *);
47
48 WEAK remote_load_library_fn remote_load_library = NULL;
49 WEAK remote_get_symbol_fn remote_get_symbol = NULL;
50 WEAK remote_run_fn remote_run = NULL;
51 WEAK remote_release_library_fn remote_release_library = NULL;
52 WEAK remote_poll_log_fn remote_poll_log = NULL;
53 WEAK remote_poll_profiler_state_fn remote_poll_profiler_state = NULL;
54 WEAK remote_profiler_set_current_func_fn remote_profiler_set_current_func = NULL;
55 WEAK remote_power_fn remote_power_hvx_on = NULL;
56 WEAK remote_power_fn remote_power_hvx_off = NULL;
57 WEAK remote_power_perf_fn remote_set_performance = NULL;
58 WEAK remote_power_mode_fn remote_set_performance_mode = NULL;
59 WEAK remote_thread_priority_fn remote_set_thread_priority = NULL;
60
61 WEAK host_malloc_init_fn host_malloc_init = NULL;
62 WEAK host_malloc_init_fn host_malloc_deinit = NULL;
63 WEAK host_malloc_fn host_malloc = NULL;
64 WEAK host_free_fn host_free = NULL;
65
66 // This checks if there are any log messages available on the remote
67 // side. It should be called after every remote call.
poll_log(void * user_context)68 WEAK void poll_log(void *user_context) {
69 if (!remote_poll_log) return;
70
71 while (true) {
72 char message[1024];
73 int read = 0;
74 int result = remote_poll_log(&message[0], sizeof(message), &read);
75 if (result != 0) {
76 // Don't make this an error, otherwise we might obscure
77 // more information about errors that would come later.
78 print(user_context) << "Hexagon: remote_poll_log failed " << result << "\n";
79 return;
80 }
81
82 if (read > 0) {
83 halide_print(user_context, message);
84 } else {
85 break;
86 }
87 }
88 }
89
get_remote_profiler_state(int * func,int * threads)90 WEAK void get_remote_profiler_state(int *func, int *threads) {
91 if (!remote_poll_profiler_state) {
92 // This should only have been called if there's a remote profiler func installed.
93 error(NULL) << "Hexagon: remote_poll_profiler_func not found\n";
94 }
95
96 remote_poll_profiler_state(func, threads);
97 }
98
99 template<typename T>
get_symbol(void * user_context,void * host_lib,const char * name,T & sym,bool required=true)100 ALWAYS_INLINE void get_symbol(void *user_context, void *host_lib, const char *name, T &sym, bool required = true) {
101 debug(user_context) << " halide_get_library_symbol('" << name << "') -> \n";
102 sym = (T)halide_get_library_symbol(host_lib, name);
103 debug(user_context) << " " << (void *)sym << "\n";
104 if (!sym && required) {
105 error(user_context) << "Required Hexagon runtime symbol '" << name << "' not found.\n";
106 }
107 }
108
109 // Load the hexagon remote runtime.
init_hexagon_runtime(void * user_context)110 WEAK int init_hexagon_runtime(void *user_context) {
111 if (remote_load_library && remote_run && remote_release_library) {
112 // Already loaded.
113 return 0;
114 }
115
116 // The "support library" for Hexagon is essentially a way to delegate Hexagon
117 // code execution based on the runtime; devices with Hexagon hardware will
118 // simply provide conduits for execution on that hardware, while test/desktop/etc
119 // environments can instead connect a simulator via the API.
120 // Load the .so for Linux or Android, and if that fails try the .dll
121 // as we may be running the windows hosted simulator.
122 void *host_lib = halide_load_library("libhalide_hexagon_host.so");
123 if (!host_lib) {
124 host_lib = halide_load_library("libhalide_hexagon_host.dll");
125 }
126
127 debug(user_context) << "Hexagon: init_hexagon_runtime (user_context: " << user_context << ")\n";
128
129 // Get the symbols we need from the library.
130 get_symbol(user_context, host_lib, "halide_hexagon_remote_load_library", remote_load_library);
131 if (!remote_load_library) return -1;
132 get_symbol(user_context, host_lib, "halide_hexagon_remote_get_symbol_v4", remote_get_symbol);
133 if (!remote_get_symbol) return -1;
134 get_symbol(user_context, host_lib, "halide_hexagon_remote_run", remote_run);
135 if (!remote_run) return -1;
136 get_symbol(user_context, host_lib, "halide_hexagon_remote_release_library", remote_release_library);
137 if (!remote_release_library) return -1;
138
139 get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_init", host_malloc_init);
140 if (!host_malloc_init) return -1;
141 get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_deinit", host_malloc_deinit);
142 if (!host_malloc_deinit) return -1;
143 get_symbol(user_context, host_lib, "halide_hexagon_host_malloc", host_malloc);
144 if (!host_malloc) return -1;
145 get_symbol(user_context, host_lib, "halide_hexagon_host_free", host_free);
146 if (!host_free) return -1;
147
148 // These symbols are optional.
149 get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_log", remote_poll_log, /* required */ false);
150 get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_profiler_state", remote_poll_profiler_state, /* required */ false);
151 get_symbol(user_context, host_lib, "halide_hexagon_remote_profiler_set_current_func", remote_profiler_set_current_func, /* required */ false);
152
153 // If these are unavailable, then the runtime always powers HVX on and so these are not necessary.
154 get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_on", remote_power_hvx_on, /* required */ false);
155 get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_off", remote_power_hvx_off, /* required */ false);
156 get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance", remote_set_performance, /* required */ false);
157 get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance_mode", remote_set_performance_mode, /* required */ false);
158 get_symbol(user_context, host_lib, "halide_hexagon_remote_set_thread_priority", remote_set_thread_priority, /* required */ false);
159
160 host_malloc_init();
161
162 return 0;
163 }
164
165 // Structure to hold the state of a module attached to the context.
166 // Also used as a linked-list to keep track of all the different
167 // modules that are attached to a context in order to release them all
168 // when then context is released.
169 struct module_state {
170 halide_hexagon_handle_t module;
171 module_state *next;
172 };
173 WEAK module_state *state_list = NULL;
174 WEAK halide_hexagon_handle_t shared_runtime = 0;
175
176 #ifdef DEBUG_RUNTIME
177
178 // In debug builds, we write shared objects to the current directory (without
179 // failing on errors).
write_shared_object(void * user_context,const char * path,const uint8_t * code,uint64_t code_size)180 WEAK void write_shared_object(void *user_context, const char *path,
181 const uint8_t *code, uint64_t code_size) {
182 void *f = fopen(path, "wb");
183 if (!f) {
184 debug(user_context) << " failed to write shared object to '" << path << "'\n";
185 return;
186 }
187 size_t written = fwrite(code, 1, code_size, f);
188 if (written != code_size) {
189 debug(user_context) << " bad write of shared object to '" << path << "'\n";
190 }
191 fclose(f);
192 }
193
194 #endif
195
196 } // namespace Hexagon
197 } // namespace Internal
198 } // namespace Runtime
199 } // namespace Halide
200
201 using namespace Halide::Runtime::Internal;
202 using namespace Halide::Runtime::Internal::Hexagon;
203
204 extern "C" {
205
halide_is_hexagon_available(void * user_context)206 WEAK bool halide_is_hexagon_available(void *user_context) {
207 int result = init_hexagon_runtime(user_context);
208 return result == 0;
209 }
210
halide_hexagon_initialize_kernels(void * user_context,void ** state_ptr,const uint8_t * code,uint64_t code_size,const uint8_t * runtime,uint64_t runtime_size)211 WEAK int halide_hexagon_initialize_kernels(void *user_context, void **state_ptr,
212 const uint8_t *code, uint64_t code_size,
213 const uint8_t *runtime, uint64_t runtime_size) {
214 int result = init_hexagon_runtime(user_context);
215 if (result != 0) return result;
216 debug(user_context) << "Hexagon: halide_hexagon_initialize_kernels (user_context: " << user_context
217 << ", state_ptr: " << state_ptr
218 << ", *state_ptr: " << *state_ptr
219 << ", code: " << code
220 << ", code_size: " << (int)code_size << ")\n"
221 << ", code: " << runtime
222 << ", code_size: " << (int)runtime_size << ")\n";
223 halide_assert(user_context, state_ptr != NULL);
224
225 #ifdef DEBUG_RUNTIME
226 uint64_t t_before = halide_current_time_ns(user_context);
227 #endif
228
229 // Create the state object if necessary. This only happens once,
230 // regardless of how many times halide_hexagon_initialize_kernels
231 // or halide_hexagon_device_release is called.
232 // halide_hexagon_device_release traverses this list and releases
233 // the module objects, but it does not modify the list nodes
234 // created/inserted here.
235 ScopedMutexLock lock(&thread_lock);
236
237 // Initialize the runtime, if necessary.
238 if (!shared_runtime) {
239 debug(user_context) << " Initializing shared runtime\n";
240 const char soname[] = "libhalide_shared_runtime.so";
241 #ifdef DEBUG_RUNTIME
242 debug(user_context) << " Writing shared object '" << soname << "'\n";
243 write_shared_object(user_context, soname, runtime, runtime_size);
244 #endif
245 debug(user_context) << " halide_remote_load_library(" << soname << ") -> ";
246 result = remote_load_library(soname, sizeof(soname), runtime, runtime_size, &shared_runtime);
247 poll_log(user_context);
248 if (result == 0) {
249 debug(user_context) << " " << (void *)(size_t)shared_runtime << "\n";
250 halide_assert(user_context, shared_runtime != 0);
251 } else {
252 debug(user_context) << " " << result << "\n";
253 error(user_context) << "Initialization of Hexagon kernels failed\n";
254 shared_runtime = 0;
255 }
256 } else {
257 debug(user_context) << " re-using existing shared runtime " << (void *)(size_t)shared_runtime << "\n";
258 }
259
260 if (result != 0) {
261 return -1;
262 }
263
264 module_state **state = (module_state **)state_ptr;
265 if (!(*state)) {
266 debug(user_context) << " allocating module state -> \n";
267 *state = (module_state *)malloc(sizeof(module_state));
268 debug(user_context) << " " << *state << "\n";
269 (*state)->module = 0;
270 (*state)->next = state_list;
271 state_list = *state;
272 }
273
274 // Create the module itself if necessary.
275 if (!(*state)->module) {
276 static int unique_id = 0;
277 stringstream soname(user_context);
278 soname << "libhalide_kernels" << unique_id++ << ".so";
279 #ifdef DEBUG_RUNTIME
280 debug(user_context) << " Writing shared object '" << soname.str() << "'\n";
281 write_shared_object(user_context, soname.str(), code, code_size);
282 #endif
283 debug(user_context) << " halide_remote_load_library(" << soname.str() << ") -> ";
284 halide_hexagon_handle_t module = 0;
285 result = remote_load_library(soname.str(), soname.size() + 1, code, code_size, &module);
286 poll_log(user_context);
287 if (result == 0) {
288 debug(user_context) << " " << (void *)(size_t)module << "\n";
289 (*state)->module = module;
290 } else {
291 debug(user_context) << " " << result << "\n";
292 error(user_context) << "Initialization of Hexagon kernels failed\n";
293 }
294 } else {
295 debug(user_context) << " re-using existing module " << (void *)(size_t)(*state)->module << "\n";
296 }
297
298 #ifdef DEBUG_RUNTIME
299 uint64_t t_after = halide_current_time_ns(user_context);
300 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
301 #endif
302
303 return result != 0 ? -1 : 0;
304 }
305
306 namespace {
307
308 // Prepare an array of remote_buffer arguments, mapping buffers if
309 // necessary. Only arguments with flags&flag_mask == flag_value are
310 // added to the mapped_args array. Returns the number of arguments
311 // mapped, or a negative number on error.
map_arguments(void * user_context,int arg_count,uint64_t arg_sizes[],void * args[],int arg_flags[],int flag_mask,int flag_value,remote_buffer * mapped_args)312 WEAK int map_arguments(void *user_context, int arg_count,
313 uint64_t arg_sizes[], void *args[], int arg_flags[], int flag_mask, int flag_value,
314 remote_buffer *mapped_args) {
315 int mapped_count = 0;
316 for (int i = 0; i < arg_count; i++) {
317 if ((arg_flags[i] & flag_mask) != flag_value) continue;
318 remote_buffer &mapped_arg = mapped_args[mapped_count++];
319 if (arg_flags[i] != 0) {
320 // This is the way that HexagonOffload packages arguments for us.
321 struct hexagon_device_pointer {
322 uint64_t dev;
323 uint8_t *host;
324 };
325 const hexagon_device_pointer *b = (hexagon_device_pointer *)args[i];
326 uint64_t device = b->dev;
327 uint8_t *host = b->host;
328 if (device) {
329 // This argument has a device handle.
330 ion_device_handle *ion_handle = reinterpret<ion_device_handle *>(device);
331 debug(user_context) << i << ", " << device << "\n";
332 mapped_arg.data = reinterpret_cast<uint8_t *>(ion_handle->buffer);
333 mapped_arg.dataLen = ion_handle->size;
334 } else {
335 // This is just a host buffer, and the size is passed in as the arg size.
336 mapped_arg.data = host;
337 mapped_arg.dataLen = arg_sizes[i];
338 }
339 } else {
340 // This is a scalar, just put the pointer/size in the result.
341 mapped_arg.data = (uint8_t *)args[i];
342 mapped_arg.dataLen = arg_sizes[i];
343 }
344 }
345 return mapped_count;
346 }
347
348 } // namespace
349
halide_hexagon_run(void * user_context,void * state_ptr,const char * name,halide_hexagon_handle_t * function,uint64_t arg_sizes[],void * args[],int arg_flags[])350 WEAK int halide_hexagon_run(void *user_context,
351 void *state_ptr,
352 const char *name,
353 halide_hexagon_handle_t *function,
354 uint64_t arg_sizes[],
355 void *args[],
356 int arg_flags[]) {
357 halide_assert(user_context, state_ptr != NULL);
358 halide_assert(user_context, function != NULL);
359 int result = init_hexagon_runtime(user_context);
360 if (result != 0) return result;
361
362 halide_hexagon_handle_t module = state_ptr ? ((module_state *)state_ptr)->module : 0;
363 debug(user_context) << "Hexagon: halide_hexagon_run ("
364 << "user_context: " << user_context << ", "
365 << "state_ptr: " << state_ptr << " (" << module << "), "
366 << "name: " << name << ", "
367 << "function: " << function << " (" << *function << "))\n";
368
369 // If we haven't gotten the symbol for this function, do so now.
370 if (*function == 0) {
371 debug(user_context) << " halide_hexagon_remote_get_symbol " << name << " -> ";
372 halide_hexagon_handle_t sym = 0;
373 int result = remote_get_symbol(module, name, strlen(name) + 1, &sym);
374 *function = result == 0 ? sym : 0;
375 poll_log(user_context);
376 debug(user_context) << " " << *function << "\n";
377 if (*function == 0) {
378 error(user_context) << "Failed to find function " << name << " in module.\n";
379 return -1;
380 }
381 }
382
383 // Allocate some remote_buffer objects on the stack.
384 int arg_count = 0;
385 while (arg_sizes[arg_count] > 0)
386 arg_count++;
387 remote_buffer *mapped_buffers =
388 (remote_buffer *)__builtin_alloca(arg_count * sizeof(remote_buffer));
389
390 // Map the arguments.
391 // First grab the input buffers (bit 0 of flags is set).
392 remote_buffer *input_buffers = mapped_buffers;
393 int input_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x1,
394 input_buffers);
395 if (input_buffer_count < 0) return input_buffer_count;
396
397 // Then the output buffers (bit 1 of flags is set).
398 remote_buffer *output_buffers = input_buffers + input_buffer_count;
399 int output_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x2, 0x2,
400 output_buffers);
401 if (output_buffer_count < 0) return output_buffer_count;
402
403 // And the input scalars (neither bits 0 or 1 of flags is set).
404 remote_buffer *input_scalars = output_buffers + output_buffer_count;
405 int input_scalar_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x0,
406 input_scalars);
407 if (input_scalar_count < 0) return input_scalar_count;
408
409 #ifdef DEBUG_RUNTIME
410 uint64_t t_before = halide_current_time_ns(user_context);
411 #endif
412
413 // If remote profiling is supported, tell the profiler to call
414 // get_remote_profiler_func to retrieve the current
415 // func. Otherwise leave it alone - the cost of remote running
416 // will be billed to the calling Func.
417 if (remote_poll_profiler_state) {
418 halide_profiler_get_state()->get_remote_profiler_state = get_remote_profiler_state;
419 if (remote_profiler_set_current_func) {
420 remote_profiler_set_current_func(halide_profiler_get_state()->current_func);
421 }
422 }
423
424 // Call the pipeline on the device side.
425 debug(user_context) << " halide_hexagon_remote_run -> ";
426 result = remote_run(module, *function,
427 input_buffers, input_buffer_count,
428 output_buffers, output_buffer_count,
429 input_scalars, input_scalar_count);
430 poll_log(user_context);
431 debug(user_context) << " " << result << "\n";
432 if (result != 0) {
433 error(user_context) << "Hexagon pipeline failed.\n";
434 return result;
435 }
436
437 halide_profiler_get_state()->get_remote_profiler_state = NULL;
438
439 #ifdef DEBUG_RUNTIME
440 uint64_t t_after = halide_current_time_ns(user_context);
441 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
442 #endif
443
444 return result != 0 ? -1 : 0;
445 }
446
halide_hexagon_device_release(void * user_context)447 WEAK int halide_hexagon_device_release(void *user_context) {
448 debug(user_context)
449 << "Hexagon: halide_hexagon_device_release (user_context: " << user_context << ")\n";
450
451 ScopedMutexLock lock(&thread_lock);
452
453 // Release all of the remote side modules.
454 module_state *state = state_list;
455 while (state) {
456 if (state->module) {
457 debug(user_context) << " halide_remote_release_library " << state
458 << " (" << state->module << ") -> ";
459 int result = remote_release_library(state->module);
460 poll_log(user_context);
461 debug(user_context) << " " << result << "\n";
462 state->module = 0;
463 }
464 state = state->next;
465 }
466 state_list = NULL;
467
468 if (shared_runtime) {
469 debug(user_context) << " releasing shared runtime\n";
470 debug(user_context) << " halide_remote_release_library " << shared_runtime << " -> ";
471 int result = remote_release_library(shared_runtime);
472 poll_log(user_context);
473 debug(user_context) << " " << result << "\n";
474 shared_runtime = 0;
475 }
476
477 return 0;
478 }
479
480 // When allocations for Hexagon are at least as large as this
481 // threshold, use an ION allocation (to get zero copy). If the
482 // allocation is smaller, use a standard allocation instead. This is
483 // done because allocating an entire page for a small allocation is
484 // wasteful, and the copy is not significant. Additionally, the
485 // FastRPC interface can probably do a better job with many small
486 // arguments than simply mapping the pages.
487 static const int min_ion_allocation_size = 4096;
488
halide_hexagon_device_malloc(void * user_context,halide_buffer_t * buf)489 WEAK int halide_hexagon_device_malloc(void *user_context, halide_buffer_t *buf) {
490 int result = init_hexagon_runtime(user_context);
491 if (result != 0) return result;
492
493 debug(user_context)
494 << "Hexagon: halide_hexagon_device_malloc (user_context: " << user_context
495 << ", buf: " << buf << ")\n";
496
497 if (buf->device) {
498 // This buffer already has a device allocation
499 return 0;
500 }
501
502 size_t size = buf->size_in_bytes();
503 halide_assert(user_context, size != 0);
504
505 // Hexagon code generation generates clamped ramp loads in a way
506 // that requires up to an extra vector beyond the end of the
507 // buffer to be legal to access.
508 size += 128;
509
510 for (int i = 0; i < buf->dimensions; i++) {
511 halide_assert(user_context, buf->dim[i].stride >= 0);
512 }
513
514 debug(user_context) << " allocating buffer of " << (uint64_t)size << " bytes\n";
515
516 #ifdef DEBUG_RUNTIME
517 uint64_t t_before = halide_current_time_ns(user_context);
518 #endif
519
520 void *ion;
521 if (size >= min_ion_allocation_size) {
522 debug(user_context) << " host_malloc len=" << (uint64_t)size << " -> ";
523 ion = host_malloc(size);
524 debug(user_context) << " " << ion << "\n";
525 if (!ion) {
526 error(user_context) << "host_malloc failed\n";
527 return -1;
528 }
529 } else {
530 debug(user_context) << " halide_malloc size=" << (uint64_t)size << " -> ";
531 ion = halide_malloc(user_context, size);
532 debug(user_context) << " " << ion << "\n";
533 if (!ion) {
534 error(user_context) << "halide_malloc failed\n";
535 return -1;
536 }
537 }
538
539 int err = halide_hexagon_wrap_device_handle(user_context, buf, ion, size);
540 if (err != 0) {
541 if (size >= min_ion_allocation_size) {
542 host_free(ion);
543 } else {
544 halide_free(user_context, ion);
545 }
546 return err;
547 }
548
549 if (!buf->host) {
550 // If the host pointer has also not been allocated yet, set it to
551 // the ion buffer. This buffer will be zero copy.
552 buf->host = (uint8_t *)ion;
553 debug(user_context) << " host <- " << buf->host << "\n";
554 }
555
556 #ifdef DEBUG_RUNTIME
557 uint64_t t_after = halide_current_time_ns(user_context);
558 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
559 #endif
560
561 return 0;
562 }
563
halide_hexagon_device_free(void * user_context,halide_buffer_t * buf)564 WEAK int halide_hexagon_device_free(void *user_context, halide_buffer_t *buf) {
565 debug(user_context)
566 << "Hexagon: halide_hexagon_device_free (user_context: " << user_context
567 << ", buf: " << buf << ")\n";
568
569 #ifdef DEBUG_RUNTIME
570 uint64_t t_before = halide_current_time_ns(user_context);
571 #endif
572
573 uint64_t size = halide_hexagon_get_device_size(user_context, buf);
574 void *ion = halide_hexagon_get_device_handle(user_context, buf);
575 halide_hexagon_detach_device_handle(user_context, buf);
576 if (size >= min_ion_allocation_size) {
577 debug(user_context) << " host_free ion=" << ion << "\n";
578 host_free(ion);
579 } else {
580 debug(user_context) << " halide_free ion=" << ion << "\n";
581 halide_free(user_context, ion);
582 }
583
584 if (buf->host == ion) {
585 // If we also set the host pointer, reset it.
586 buf->host = NULL;
587 debug(user_context) << " host <- 0x0\n";
588 }
589
590 #ifdef DEBUG_RUNTIME
591 uint64_t t_after = halide_current_time_ns(user_context);
592 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
593 #endif
594
595 // This is to match what the default implementation of halide_device_free does.
596 buf->set_device_dirty(false);
597 return 0;
598 }
599
halide_hexagon_copy_to_device(void * user_context,halide_buffer_t * buf)600 WEAK int halide_hexagon_copy_to_device(void *user_context, halide_buffer_t *buf) {
601 int err = halide_hexagon_device_malloc(user_context, buf);
602 if (err) {
603 return err;
604 }
605
606 debug(user_context)
607 << "Hexagon: halide_hexagon_copy_to_device (user_context: " << user_context
608 << ", buf: " << buf << ")\n";
609
610 #ifdef DEBUG_RUNTIME
611 uint64_t t_before = halide_current_time_ns(user_context);
612 #endif
613
614 halide_assert(user_context, buf->host && buf->device);
615 device_copy c = make_host_to_device_copy(buf);
616
617 // Get the descriptor associated with the ion buffer.
618 c.dst = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
619 copy_memory(c, user_context);
620
621 #ifdef DEBUG_RUNTIME
622 uint64_t t_after = halide_current_time_ns(user_context);
623 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
624 #endif
625
626 return 0;
627 }
628
halide_hexagon_copy_to_host(void * user_context,struct halide_buffer_t * buf)629 WEAK int halide_hexagon_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
630 debug(user_context)
631 << "Hexagon: halide_hexagon_copy_to_host (user_context: " << user_context
632 << ", buf: " << buf << ")\n";
633
634 #ifdef DEBUG_RUNTIME
635 uint64_t t_before = halide_current_time_ns(user_context);
636 #endif
637
638 halide_assert(user_context, buf->host && buf->device);
639 device_copy c = make_device_to_host_copy(buf);
640
641 // Get the descriptor associated with the ion buffer.
642 c.src = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
643 copy_memory(c, user_context);
644
645 #ifdef DEBUG_RUNTIME
646 uint64_t t_after = halide_current_time_ns(user_context);
647 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
648 #endif
649
650 return 0;
651 }
652
halide_hexagon_device_sync(void * user_context,struct halide_buffer_t *)653 WEAK int halide_hexagon_device_sync(void *user_context, struct halide_buffer_t *) {
654 debug(user_context)
655 << "Hexagon: halide_hexagon_device_sync (user_context: " << user_context << ")\n";
656 // Nothing to do.
657 return 0;
658 }
659
halide_hexagon_wrap_device_handle(void * user_context,struct halide_buffer_t * buf,void * ion_buf,uint64_t size)660 WEAK int halide_hexagon_wrap_device_handle(void *user_context, struct halide_buffer_t *buf,
661 void *ion_buf, uint64_t size) {
662 halide_assert(user_context, buf->device == 0);
663 if (buf->device != 0) {
664 return -2;
665 }
666
667 ion_device_handle *handle = (ion_device_handle *)malloc(sizeof(ion_device_handle));
668 if (!handle) {
669 return -1;
670 }
671 handle->buffer = ion_buf;
672 handle->size = size;
673 buf->device_interface = &hexagon_device_interface;
674 buf->device_interface->impl->use_module();
675 buf->device = reinterpret<uint64_t>(handle);
676 return 0;
677 }
678
halide_hexagon_detach_device_handle(void * user_context,struct halide_buffer_t * buf)679 WEAK int halide_hexagon_detach_device_handle(void *user_context, struct halide_buffer_t *buf) {
680 if (buf->device == 0) {
681 return NULL;
682 }
683 halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
684 ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
685 free(handle);
686
687 buf->device_interface->impl->release_module();
688 buf->device = 0;
689 buf->device_interface = NULL;
690 return 0;
691 }
692
halide_hexagon_get_device_handle(void * user_context,struct halide_buffer_t * buf)693 WEAK void *halide_hexagon_get_device_handle(void *user_context, struct halide_buffer_t *buf) {
694 if (buf->device == 0) {
695 return NULL;
696 }
697 halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
698 ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
699 return handle->buffer;
700 }
701
halide_hexagon_get_device_size(void * user_context,struct halide_buffer_t * buf)702 WEAK uint64_t halide_hexagon_get_device_size(void *user_context, struct halide_buffer_t *buf) {
703 if (buf->device == 0) {
704 return 0;
705 }
706 halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
707 ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
708 return handle->size;
709 }
710
halide_hexagon_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf)711 WEAK int halide_hexagon_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
712 debug(user_context) << "halide_hexagon_device_and_host_malloc called.\n";
713 int result = halide_hexagon_device_malloc(user_context, buf);
714 if (result == 0) {
715 buf->host = (uint8_t *)halide_hexagon_get_device_handle(user_context, buf);
716 }
717 return result;
718 }
719
halide_hexagon_device_and_host_free(void * user_context,struct halide_buffer_t * buf)720 WEAK int halide_hexagon_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
721 debug(user_context) << "halide_hexagon_device_and_host_free called.\n";
722 halide_hexagon_device_free(user_context, buf);
723 buf->host = NULL;
724 return 0;
725 }
726
halide_hexagon_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)727 WEAK int halide_hexagon_buffer_copy(void *user_context, struct halide_buffer_t *src,
728 const struct halide_device_interface_t *dst_device_interface,
729 struct halide_buffer_t *dst) {
730 // We only handle copies to hexagon buffers or to host
731 halide_assert(user_context, dst_device_interface == NULL ||
732 dst_device_interface == &hexagon_device_interface);
733
734 if ((src->device_dirty() || src->host == NULL) &&
735 src->device_interface != &hexagon_device_interface) {
736 halide_assert(user_context, dst_device_interface == &hexagon_device_interface);
737 // This is handled at the higher level.
738 return halide_error_code_incompatible_device_interface;
739 }
740
741 bool from_host = (src->device_interface != &hexagon_device_interface) ||
742 (src->device == 0) ||
743 (src->host_dirty() && src->host != NULL);
744 bool to_host = !dst_device_interface;
745
746 halide_assert(user_context, from_host || src->device);
747 halide_assert(user_context, to_host || dst->device);
748
749 #ifdef DEBUG_RUNTIME
750 uint64_t t_before = halide_current_time_ns(user_context);
751 #endif
752
753 device_copy c = make_buffer_copy(src, from_host, dst, to_host);
754
755 int err = 0;
756
757 // Get the descriptor associated with the ion buffer.
758 if (!from_host) {
759 c.src = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, src));
760 }
761 if (!to_host) {
762 c.dst = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, dst));
763 }
764 copy_memory(c, user_context);
765
766 #ifdef DEBUG_RUNTIME
767 uint64_t t_after = halide_current_time_ns(user_context);
768 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
769 #endif
770
771 return err;
772 }
773
774 namespace {
775
hexagon_device_crop_from_offset(const struct halide_buffer_t * src,int64_t offset,struct halide_buffer_t * dst)776 WEAK int hexagon_device_crop_from_offset(const struct halide_buffer_t *src, int64_t offset, struct halide_buffer_t *dst) {
777 ion_device_handle *src_handle = (ion_device_handle *)src->device;
778 ion_device_handle *dst_handle = (ion_device_handle *)malloc(sizeof(ion_device_handle));
779 if (!dst_handle) {
780 return halide_error_code_out_of_memory;
781 }
782
783 dst_handle->buffer = (uint8_t *)src_handle->buffer + offset;
784 dst_handle->size = src_handle->size - offset;
785 dst->device = reinterpret<uint64_t>(dst_handle);
786 dst->device_interface = src->device_interface;
787 dst->set_device_dirty(src->device_dirty());
788 return 0;
789 }
790
791 } // namespace
792
halide_hexagon_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)793 WEAK int halide_hexagon_device_crop(void *user_context, const struct halide_buffer_t *src,
794 struct halide_buffer_t *dst) {
795 debug(user_context) << "halide_hexagon_device_crop called.\n";
796
797 const int64_t offset = calc_device_crop_byte_offset(src, dst);
798 return hexagon_device_crop_from_offset(src, offset, dst);
799 }
800
halide_hexagon_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)801 WEAK int halide_hexagon_device_slice(void *user_context, const struct halide_buffer_t *src,
802 int slice_dim, int slice_pos, struct halide_buffer_t *dst) {
803 debug(user_context) << "halide_hexagon_device_slice called.\n";
804
805 const int64_t offset = calc_device_slice_byte_offset(src, slice_dim, slice_pos);
806 return hexagon_device_crop_from_offset(src, offset, dst);
807 }
808
halide_hexagon_device_release_crop(void * user_context,struct halide_buffer_t * dst)809 WEAK int halide_hexagon_device_release_crop(void *user_context, struct halide_buffer_t *dst) {
810 debug(user_context) << "halide_hexagon_release_crop called\n";
811 free((ion_device_handle *)dst->device);
812 return 0;
813 }
814
halide_hexagon_power_hvx_on(void * user_context)815 WEAK int halide_hexagon_power_hvx_on(void *user_context) {
816 int result = init_hexagon_runtime(user_context);
817 if (result != 0) return result;
818
819 debug(user_context) << "halide_hexagon_power_hvx_on\n";
820 if (!remote_power_hvx_on) {
821 // The function is not available in this version of the
822 // runtime, this runtime always powers HVX on.
823 return 0;
824 }
825
826 #ifdef DEBUG_RUNTIME
827 uint64_t t_before = halide_current_time_ns(user_context);
828 #endif
829
830 debug(user_context) << " remote_power_hvx_on -> ";
831 result = remote_power_hvx_on();
832 debug(user_context) << " " << result << "\n";
833 if (result != 0) {
834 error(user_context) << "remote_power_hvx_on failed.\n";
835 return result;
836 }
837
838 #ifdef DEBUG_RUNTIME
839 uint64_t t_after = halide_current_time_ns(user_context);
840 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
841 #endif
842
843 return 0;
844 }
845
halide_hexagon_power_hvx_off(void * user_context)846 WEAK int halide_hexagon_power_hvx_off(void *user_context) {
847 int result = init_hexagon_runtime(user_context);
848 if (result != 0) return result;
849
850 debug(user_context) << "halide_hexagon_power_hvx_off\n";
851 if (!remote_power_hvx_off) {
852 // The function is not available in this version of the
853 // runtime, this runtime always powers HVX on.
854 return 0;
855 }
856
857 #ifdef DEBUG_RUNTIME
858 uint64_t t_before = halide_current_time_ns(user_context);
859 #endif
860
861 debug(user_context) << " remote_power_hvx_off -> ";
862 result = remote_power_hvx_off();
863 debug(user_context) << " " << result << "\n";
864 if (result != 0) {
865 error(user_context) << "remote_power_hvx_off failed.\n";
866 return result;
867 }
868
869 #ifdef DEBUG_RUNTIME
870 uint64_t t_after = halide_current_time_ns(user_context);
871 debug(user_context) << " Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
872 #endif
873
874 return 0;
875 }
876
halide_hexagon_power_hvx_off_as_destructor(void * user_context,void *)877 WEAK void halide_hexagon_power_hvx_off_as_destructor(void *user_context, void * /* obj */) {
878 halide_hexagon_power_hvx_off(user_context);
879 }
880
halide_hexagon_set_performance_mode(void * user_context,halide_hexagon_power_mode_t mode)881 WEAK int halide_hexagon_set_performance_mode(void *user_context, halide_hexagon_power_mode_t mode) {
882 int result = init_hexagon_runtime(user_context);
883 if (result != 0) return result;
884
885 debug(user_context) << "halide_hexagon_set_performance_mode\n";
886 if (!remote_set_performance_mode) {
887 // This runtime doesn't support changing the performance target.
888 return 0;
889 }
890
891 debug(user_context) << " remote_set_performance_mode -> ";
892 result = remote_set_performance_mode(mode);
893 debug(user_context) << " " << result << "\n";
894 if (result != 0) {
895 error(user_context) << "remote_set_performance_mode failed.\n";
896 return result;
897 }
898
899 return 0;
900 }
901
halide_hexagon_set_performance(void * user_context,halide_hexagon_power_t * perf)902 WEAK int halide_hexagon_set_performance(void *user_context, halide_hexagon_power_t *perf) {
903 int result = init_hexagon_runtime(user_context);
904 if (result != 0) return result;
905
906 debug(user_context) << "halide_hexagon_set_performance\n";
907 if (!remote_set_performance) {
908 // This runtime doesn't support changing the performance target.
909 return 0;
910 }
911
912 debug(user_context) << " remote_set_performance -> ";
913 result = remote_set_performance(perf->set_mips,
914 perf->mipsPerThread,
915 perf->mipsTotal,
916 perf->set_bus_bw,
917 perf->bwMegabytesPerSec,
918 perf->busbwUsagePercentage,
919 perf->set_latency,
920 perf->latency);
921
922 debug(user_context) << " " << result << "\n";
923 if (result != 0) {
924 error(user_context) << "remote_set_performance failed.\n";
925 return result;
926 }
927
928 return 0;
929 }
930
halide_hexagon_set_thread_priority(void * user_context,int priority)931 WEAK int halide_hexagon_set_thread_priority(void *user_context, int priority) {
932 int result = init_hexagon_runtime(user_context);
933 if (result != 0) return result;
934
935 debug(user_context) << "halide_hexagon_set_thread_priority\n";
936 if (!remote_set_thread_priority) {
937 // This runtime doesn't support changing the thread priority.
938 return 0;
939 }
940
941 debug(user_context) << " remote_set_thread_priority -> ";
942 result = remote_set_thread_priority(priority);
943 debug(user_context) << " " << result << "\n";
944 if (result != 0) {
945 error(user_context) << "remote_set_thread_priority failed.\n";
946 return result;
947 }
948
949 return 0;
950 }
951
halide_hexagon_device_interface()952 WEAK const halide_device_interface_t *halide_hexagon_device_interface() {
953 return &hexagon_device_interface;
954 }
955
956 namespace {
halide_hexagon_cleanup()957 WEAK __attribute__((destructor)) void halide_hexagon_cleanup() {
958 halide_hexagon_device_release(NULL);
959 }
960 } // namespace
961
962 } // extern "C" linkage
963
964 namespace Halide {
965 namespace Runtime {
966 namespace Internal {
967 namespace Hexagon {
968
969 WEAK halide_device_interface_impl_t hexagon_device_interface_impl = {
970 halide_use_jit_module,
971 halide_release_jit_module,
972 halide_hexagon_device_malloc,
973 halide_hexagon_device_free,
974 halide_hexagon_device_sync,
975 halide_hexagon_device_release,
976 halide_hexagon_copy_to_host,
977 halide_hexagon_copy_to_device,
978 halide_hexagon_device_and_host_malloc,
979 halide_hexagon_device_and_host_free,
980 halide_hexagon_buffer_copy,
981 halide_hexagon_device_crop,
982 halide_hexagon_device_slice,
983 halide_hexagon_device_release_crop,
984 halide_default_device_wrap_native,
985 halide_default_device_detach_native,
986 };
987
988 WEAK halide_device_interface_t hexagon_device_interface = {
989 halide_device_malloc,
990 halide_device_free,
991 halide_device_sync,
992 halide_device_release,
993 halide_copy_to_host,
994 halide_copy_to_device,
995 halide_device_and_host_malloc,
996 halide_device_and_host_free,
997 halide_buffer_copy,
998 halide_device_crop,
999 halide_device_slice,
1000 halide_device_release_crop,
1001 halide_device_wrap_native,
1002 halide_device_detach_native,
1003 NULL,
1004 &hexagon_device_interface_impl};
1005
1006 } // namespace Hexagon
1007 } // namespace Internal
1008 } // namespace Runtime
1009 } // namespace Halide
1010