1 #include "HalideRuntimeQurt.h"
2 #include "mini_qurt.h"
3 #include "printer.h"
4 #include "runtime_internal.h"
5 
6 using namespace Halide::Runtime::Internal::Qurt;
7 
8 extern "C" {
9 
halide_qurt_hvx_lock(void * user_context,int size)10 WEAK int halide_qurt_hvx_lock(void *user_context, int size) {
11     qurt_hvx_mode_t mode;
12     switch (size) {
13     case 64:
14         mode = QURT_HVX_MODE_64B;
15         break;
16     case 128:
17         mode = QURT_HVX_MODE_128B;
18         break;
19     default:
20         error(user_context) << "HVX lock size must be 64 or 128.\n";
21         return -1;
22     }
23 
24     debug(user_context) << "QuRT: qurt_hvx_lock(" << mode << ") ->\n";
25     int result = qurt_hvx_lock(mode);
26     debug(user_context) << "        " << result << "\n";
27     if (result != QURT_EOK) {
28         error(user_context) << "qurt_hvx_lock failed\n";
29         return -1;
30     }
31     return 0;
32 }
33 
halide_qurt_hvx_unlock(void * user_context)34 WEAK int halide_qurt_hvx_unlock(void *user_context) {
35     debug(user_context) << "QuRT: qurt_hvx_unlock ->\n";
36     int result = qurt_hvx_unlock();
37     debug(user_context) << "        " << result << "\n";
38     if (result != QURT_EOK) {
39         error(user_context) << "qurt_hvx_unlock failed\n";
40         return -1;
41     }
42 
43     return 0;
44 }
45 
halide_qurt_hvx_unlock_as_destructor(void * user_context,void *)46 WEAK void halide_qurt_hvx_unlock_as_destructor(void *user_context, void * /*obj*/) {
47     halide_qurt_hvx_unlock(user_context);
48 }
49 
50 // These need to inline, otherwise the extern call with the ptr
51 // parameter breaks a lot of optimizations.
_halide_prefetch_2d(const void * ptr,int width_bytes,int height,int stride_bytes)52 WEAK_INLINE int _halide_prefetch_2d(const void *ptr, int width_bytes, int height, int stride_bytes) {
53     // Notes:
54     //  - Prefetches can be queued up to 3 deep (MAX_PREFETCH)
55     //  - If 3 are already pending, the oldest request is dropped
56     //  - USR:PFA status bit is set to indicate that prefetches are in progress
57     //  - A l2fetch with any subfield set to zero cancels all pending prefetches
58     //  - The l2fetch starting address must be in mapped memory but the range
59     //    prefetched can go into unmapped memory without raising an exception
60     const int dir = 1;
61     uint64_t desc =
62         (static_cast<uint64_t>(dir) << 48) |
63         (static_cast<uint64_t>(stride_bytes) << 32) |
64         (static_cast<uint64_t>(width_bytes) << 16) |
65         (static_cast<uint64_t>(height) << 0);
66     __asm__ __volatile__("l2fetch(%0,%1)"
67                          :
68                          : "r"(ptr), "r"(desc));
69     return 0;
70 }
71 
_halide_prefetch(const void * ptr,int size)72 WEAK_INLINE int _halide_prefetch(const void *ptr, int size) {
73     _halide_prefetch_2d(ptr, size, 1, 1);
74     return 0;
75 }
76 
77 struct hexagon_buffer_t_arg {
78     uint64_t device;
79     uint8_t *host;
80 };
81 
_halide_hexagon_buffer_get_host(const hexagon_buffer_t_arg * buf)82 WEAK_INLINE uint8_t *_halide_hexagon_buffer_get_host(const hexagon_buffer_t_arg *buf) {
83     return buf->host;
84 }
85 
_halide_hexagon_buffer_get_device(const hexagon_buffer_t_arg * buf)86 WEAK_INLINE uint64_t _halide_hexagon_buffer_get_device(const hexagon_buffer_t_arg *buf) {
87     return buf->device;
88 }
89 }
90