1 #include "HalideRuntimeHexagonDma.h"
2 #include "device_buffer_utils.h"
3 #include "device_interface.h"
4 #include "hexagon_dma_pool.h"
5 #include "mini_hexagon_dma.h"
6 #include "printer.h"
7 #include "runtime_internal.h"
8 #include "scoped_mutex_lock.h"
9 
10 namespace Halide {
11 namespace Runtime {
12 namespace Internal {
13 namespace HexagonDma {
14 
15 extern WEAK halide_device_interface_t hexagon_dma_device_interface;
16 
17 #define descriptor_size 64
18 
19 // DMA device handle structure, which holds all the necessary frame related parameters.
20 // To be used for DMA transfer.
21 struct dma_device_handle {
22     uint8_t *buffer;
23     uint16_t offset_rdx;
24     uint16_t offset_rdy;
25     uint16_t offset_wrx;
26     uint16_t offset_wry;
27     void *dma_engine;
28     int frame_width;
29     int frame_height;
30     int frame_stride;
31     bool is_ubwc;
32     bool is_write;
33     t_eDmaFmt fmt;
34 };
35 
36 // Allocating memory for DMA device handle. The life time of this memory is till the frame
37 // is active in DMA process.
malloc_device_handle()38 inline dma_device_handle *malloc_device_handle() {
39     dma_device_handle *dev = (dma_device_handle *)malloc(sizeof(dma_device_handle));
40     dev->buffer = 0;
41     dev->offset_rdx = 0;
42     dev->offset_rdy = 0;
43     dev->offset_wrx = 0;
44     dev->offset_wry = 0;
45     dev->dma_engine = 0;
46     dev->frame_width = 0;
47     dev->frame_height = 0;
48     dev->frame_stride = 0;
49     dev->is_ubwc = 0;
50     dev->fmt = eDmaFmt_RawData;
51     dev->is_write = 0;
52     return dev;
53 }
54 
55 // Data Structure for chaining of DMA descriptors.
56 typedef struct desc_pool {
57     void *descriptor;
58     bool used;
59     struct desc_pool *next;
60 } desc_pool_t;
61 
62 typedef desc_pool_t *pdesc_pool;
63 
64 WEAK pdesc_pool dma_desc_pool = NULL;
65 WEAK halide_mutex hexagon_desc_mutex;
66 
67 }  // namespace HexagonDma
68 }  // namespace Internal
69 }  // namespace Runtime
70 }  // namespace Halide
71 
72 using namespace Halide::Runtime::Internal::HexagonDma;
73 
74 namespace {
75 
76 // Core logic for DMA descriptor Pooling. The idea is to reuse the Allocated cache for descriptors,
77 // if it is free. In case of un availability of free descriptors, two new descriptors are allocated in the cache
78 // and make them available in the pool (128B is the minimum cache size that can be locked)
desc_pool_get(void * user_context)79 void *desc_pool_get(void *user_context) {
80     ScopedMutexLock lock(&hexagon_desc_mutex);
81     pdesc_pool temp = dma_desc_pool;
82     pdesc_pool prev = NULL;
83     // Walk the list
84     while (temp != NULL) {
85         if (!temp->used) {
86             temp->used = true;
87             return (void *)temp->descriptor;
88         }
89         prev = temp;
90         temp = temp->next;
91     }
92     // If we are still here that means temp was null.
93     // We have to allocate two descriptors here, to lock a full cache line
94     temp = (pdesc_pool)malloc(sizeof(desc_pool_t));
95     if (temp == NULL) {
96         error(user_context) << "Hexagon: Out of memory (malloc failed for DMA descriptor pool)\n";
97         return NULL;
98     }
99     uint8_t *desc = (uint8_t *)HAP_cache_lock(sizeof(char) * descriptor_size * 2, NULL);
100     if (desc == NULL) {
101         free(temp);
102         error(user_context) << "Hexagon: Out of memory (HAP_cache_lock failed for descriptor)\n";
103         return NULL;
104     }
105     temp->descriptor = (void *)desc;
106     temp->used = true;
107 
108     // Now allocate the second element in list
109     temp->next = (pdesc_pool)malloc(sizeof(desc_pool_t));
110     if (temp->next != NULL) {
111         (temp->next)->descriptor = (void *)(desc + descriptor_size);
112         (temp->next)->used = false;
113         (temp->next)->next = NULL;
114     } else {
115         // no need to throw error since we allocate two descriptor at a time
116         // but only use one
117         debug(user_context) << "Hexagon: malloc failed\n";
118     }
119 
120     if (prev != NULL) {
121         prev->next = temp;
122     } else if (dma_desc_pool == NULL) {
123         dma_desc_pool = temp;
124     }
125     return (void *)temp->descriptor;
126 }
127 
desc_pool_put(void * user_context,void * desc)128 void desc_pool_put(void *user_context, void *desc) {
129     ScopedMutexLock lock(&hexagon_desc_mutex);
130     halide_assert(user_context, desc);
131     pdesc_pool temp = dma_desc_pool;
132     while (temp != NULL) {
133         if (temp->descriptor == desc) {
134             temp->used = false;
135             return;
136         }
137         temp = temp->next;
138     }
139     error(user_context) << "Hexagon: desc not found " << desc << "\n";
140 }
141 
142 // DMA descriptor freeing logic, Two descriptors at a time will be freed.
desc_pool_free(void * user_context)143 void desc_pool_free(void *user_context) {
144     ScopedMutexLock lock(&hexagon_desc_mutex);
145     pdesc_pool temp = dma_desc_pool;
146     while (temp != NULL) {
147         pdesc_pool temp2 = temp;
148         temp = temp->next;
149         if (temp2->descriptor != NULL) {
150             HAP_cache_unlock(temp2->descriptor);
151         }
152         free(temp2);
153         temp2 = temp;
154         if (temp != NULL) {
155             temp = temp->next;
156             free(temp2);
157         }
158     }
159 
160     // Mark pool is empty, to avoid re-freeing
161     dma_desc_pool = NULL;
162 }
163 
164 // User ptovided Image format to DMA format conversion.
halide_hexagon_get_dma_format(void * user_context,const halide_hexagon_image_fmt_t format)165 inline t_eDmaFmt halide_hexagon_get_dma_format(void *user_context, const halide_hexagon_image_fmt_t format) {
166     //A giant switch case to match image formats to dma formats
167     switch (format) {
168     case halide_hexagon_fmt_NV12:
169         return eDmaFmt_NV12;
170     case halide_hexagon_fmt_NV12_Y:
171         return eDmaFmt_NV12_Y;
172     case halide_hexagon_fmt_NV12_UV:
173         return eDmaFmt_NV12_UV;
174     case halide_hexagon_fmt_P010:
175         return eDmaFmt_P010;
176     case halide_hexagon_fmt_P010_Y:
177         return eDmaFmt_P010_Y;
178     case halide_hexagon_fmt_P010_UV:
179         return eDmaFmt_P010_UV;
180     case halide_hexagon_fmt_TP10:
181         return eDmaFmt_TP10;
182     case halide_hexagon_fmt_TP10_Y:
183         return eDmaFmt_TP10_Y;
184     case halide_hexagon_fmt_TP10_UV:
185         return eDmaFmt_TP10_UV;
186     case halide_hexagon_fmt_NV124R:
187         return eDmaFmt_NV124R;
188     case halide_hexagon_fmt_NV124R_Y:
189         return eDmaFmt_NV124R_Y;
190     case halide_hexagon_fmt_NV124R_UV:
191         return eDmaFmt_NV124R_UV;
192     case halide_hexagon_fmt_RawData:
193         return eDmaFmt_RawData;
194     default:
195         error(user_context) << "Hexagon: DMA Format Mismatch " << format << "\n";
196         return eDmaFmt_MAX;
197     }
198 }
199 
200 // The core logic of DMA Transfer. This API uses the DMA device handle populated prior to calling this
201 // and does the necessary steps for performing the DMA Operation.
halide_hexagon_dma_wrapper(void * user_context,struct halide_buffer_t * src,struct halide_buffer_t * dst)202 int halide_hexagon_dma_wrapper(void *user_context, struct halide_buffer_t *src,
203                                struct halide_buffer_t *dst) {
204 
205     dma_device_handle *dev = NULL;
206     dev = (dma_device_handle *)src->device;
207 
208     debug(user_context)
209         << "Hexagon dev handle: buffer: " << dev->buffer
210         << " dev_offset(rdx: : " << dev->offset_rdx << " rdy: " << dev->offset_rdy << ")"
211         << " dev_offset(wrx: : " << dev->offset_wrx << " wry: " << dev->offset_wry << ")"
212         << " frame(w: " << dev->frame_width << " h: " << dev->frame_height << " s: " << dev->frame_stride << ")"
213         << "\n";
214 
215     debug(user_context)
216         << "size_in_bytes() src: " << static_cast<uint32>(src->size_in_bytes())
217         << " dst: " << static_cast<uint32>(dst->size_in_bytes())
218         << "\n";
219 
220     // Assert if buffer dimensions do not fulfill the format requirements
221     if (dev->fmt == eDmaFmt_RawData) {
222         halide_assert(user_context, src->dimensions <= 3);
223     }
224 
225     if ((dev->fmt == eDmaFmt_NV12_Y) ||
226         (dev->fmt == eDmaFmt_P010_Y) ||
227         (dev->fmt == eDmaFmt_TP10_Y) ||
228         (dev->fmt == eDmaFmt_NV124R_Y)) {
229         halide_assert(user_context, src->dimensions == 2);
230     }
231 
232     if ((dev->fmt == eDmaFmt_NV12_UV) ||
233         (dev->fmt == eDmaFmt_P010_UV) ||
234         (dev->fmt == eDmaFmt_TP10_UV) ||
235         (dev->fmt == eDmaFmt_NV124R_UV)) {
236         halide_assert(user_context, src->dimensions == 3);
237         halide_assert(user_context, src->dim[0].stride == 2);
238         halide_assert(user_context, src->dim[2].stride == 1);
239         halide_assert(user_context, src->dim[2].min == 0);
240         halide_assert(user_context, src->dim[2].extent == 2);
241     }
242 
243     t_StDmaWrapper_RoiAlignInfo stWalkSize = {
244         static_cast<uint16>(dst->dim[0].extent * dst->dim[0].stride),
245         static_cast<uint16>(dst->dim[1].extent)};
246     int nRet = nDmaWrapper_GetRecommendedWalkSize(dev->fmt, dev->is_ubwc, &stWalkSize);
247 
248     int roi_stride = nDmaWrapper_GetRecommendedIntermBufStride(dev->fmt, &stWalkSize, dev->is_ubwc);
249     int roi_width = stWalkSize.u16W;
250     int roi_height = stWalkSize.u16H;
251 
252     debug(user_context)
253         << "Hexagon: Recommended ROI(w: " << roi_width << " h: " << roi_height << " s: " << roi_stride << ")\n";
254 
255     // account for folding, where the dim[1].stride reflects the fold_storage stride
256     if (dst->dim[1].stride > roi_stride)
257         roi_stride = dst->dim[1].stride;
258 
259     // Assert if destination stride is a multipe of recommended stride
260     halide_assert(user_context, ((dst->dim[1].stride % roi_stride) == 0));
261 
262     // Return NULL if descriptor is not allocated
263     void *desc_addr = desc_pool_get(user_context);
264     if (desc_addr == NULL) {
265         debug(user_context) << "Hexagon: DMA descriptor allocation error\n";
266         return halide_error_code_device_buffer_copy_failed;
267     }
268 
269     int buf_size = roi_stride * roi_height * src->type.bytes();
270     debug(user_context) << "Hexagon: cache buffer size " << buf_size << "\n";
271 
272     t_StDmaWrapper_DmaTransferSetup stDmaTransferParm;
273     stDmaTransferParm.eFmt = dev->fmt;
274     stDmaTransferParm.u16FrameW = dev->frame_width;
275     stDmaTransferParm.u16FrameH = dev->frame_height;
276     stDmaTransferParm.u16FrameStride = dev->frame_stride;
277     stDmaTransferParm.u16RoiW = roi_width;
278     stDmaTransferParm.u16RoiH = roi_height;
279     stDmaTransferParm.u16RoiStride = roi_stride;
280     stDmaTransferParm.bIsFmtUbwc = dev->is_ubwc;
281     stDmaTransferParm.bUse16BitPaddingInL2 = 0;
282     stDmaTransferParm.pDescBuf = desc_addr;
283     stDmaTransferParm.pTcmDataBuf = reinterpret_cast<void *>(dst->host);
284     stDmaTransferParm.pFrameBuf = dev->buffer;
285     if (dev->is_write) {
286         stDmaTransferParm.eTransferType = eDmaWrapper_L2ToDdr;
287         stDmaTransferParm.u16RoiX = dev->offset_wrx * dst->dim[0].stride;
288         stDmaTransferParm.u16RoiY = dev->offset_wry;
289     } else {
290         stDmaTransferParm.eTransferType = eDmaWrapper_DdrToL2;
291         stDmaTransferParm.u16RoiX = (dev->offset_rdx + dst->dim[0].min) * dst->dim[0].stride;
292         stDmaTransferParm.u16RoiY = dev->offset_rdy + dst->dim[1].min;
293     }
294 
295     // Raw Format Planar
296     if ((dev->fmt == eDmaFmt_RawData) &&
297         (dst->dimensions == 3)) {
298         stDmaTransferParm.u16RoiY = dev->offset_rdy + dst->dim[1].min + (dst->dim[2].min * src->dim[1].stride);
299     }
300 
301     // DMA Driver implicitly halves the Height and Y Offset for chroma, based on Y/UV
302     // planar relation for 4:2:0 format, to adjust the for plane size difference.
303     // This driver adjustment is compensated here for Halide that treats Y/UV separately.
304     // i.e. ROI size is same for both Luma and Chroma
305     if ((dev->fmt == eDmaFmt_NV12_UV) ||
306         (dev->fmt == eDmaFmt_P010_UV) ||
307         (dev->fmt == eDmaFmt_TP10_UV) ||
308         (dev->fmt == eDmaFmt_NV124R_UV)) {
309         stDmaTransferParm.u16RoiH = roi_height * 2;
310         if (dev->is_write) {
311             stDmaTransferParm.u16RoiY = stDmaTransferParm.u16RoiY * 2;
312         } else {
313             stDmaTransferParm.u16RoiY = (stDmaTransferParm.u16RoiY - dev->frame_height) * 2;
314         }
315         debug(user_context)
316             << "Hexagon: u16Roi(X: " << stDmaTransferParm.u16RoiX << " Y: " << stDmaTransferParm.u16RoiY
317             << " W: " << stDmaTransferParm.u16RoiW << " H: " << stDmaTransferParm.u16RoiH << ")"
318             << " dst->dim[1].min: " << dst->dim[1].min << "\n";
319     }
320 
321     void *dma_engine = halide_hexagon_allocate_from_dma_pool(user_context, dev->dma_engine);
322     if (!dma_engine) {
323         debug(user_context) << "Hexagon: Dma Engine Allocation Faliure\n";
324         return halide_error_code_device_buffer_copy_failed;
325     }
326 
327     debug(user_context)
328         << "Hexagon: " << dma_engine << " transfer: " << stDmaTransferParm.pDescBuf << "\n";
329     nRet = nDmaWrapper_DmaTransferSetup(dma_engine, &stDmaTransferParm);
330     if (nRet != QURT_EOK) {
331         error(user_context) << "Hexagon: DMA Transfer Error: " << nRet << "\n";
332         return halide_error_code_device_buffer_copy_failed;
333     }
334 
335     debug(user_context) << "Hexagon: " << dma_engine << " move\n";
336     nRet = nDmaWrapper_Move(dma_engine);
337     if (nRet != QURT_EOK) {
338         error(user_context) << "Hexagon: nDmaWrapper_Move error: " << nRet << "\n";
339         return halide_error_code_device_buffer_copy_failed;
340     }
341 
342     debug(user_context) << "Hexagon: " << dma_engine << " wait\n";
343     nRet = nDmaWrapper_Wait(dma_engine);
344     if (nRet != QURT_EOK) {
345         error(user_context) << "Hexagon: nDmaWrapper_Wait error: " << nRet << "\n";
346         return halide_error_code_device_buffer_copy_failed;
347     }
348 
349     desc_pool_put(user_context, desc_addr);
350     nRet = halide_hexagon_free_to_dma_pool(user_context, dma_engine, dev->dma_engine);
351     if (nRet != halide_error_code_success) {
352         debug(user_context) << "halide_hexagon_free_from_dma_pool error:" << nRet << "\n";
353         return nRet;
354     }
355     return halide_error_code_success;
356 }
357 
358 }  // namespace
359 
360 extern "C" {
361 
halide_hexagon_dma_device_malloc(void * user_context,halide_buffer_t * buf)362 WEAK int halide_hexagon_dma_device_malloc(void *user_context, halide_buffer_t *buf) {
363     debug(user_context)
364         << "Hexagon: halide_hexagon_dma_device_malloc (user_context: " << user_context
365         << ", buf: " << *buf << ")\n";
366 
367     if (buf->device) {
368         debug(user_context) << "Hexagon: buffer already has a device. No action required\n";
369         return halide_error_code_success;
370     }
371 
372     size_t size = buf->size_in_bytes();
373     halide_assert(user_context, size != 0);
374 
375     void *mem = halide_malloc(user_context, size);
376     if (!mem) {
377         error(user_context) << "Hexagon: Out of memory (halide_malloc failed for device_malloc)\n";
378         return halide_error_code_out_of_memory;
379     }
380 
381     int err = halide_hexagon_dma_device_wrap_native(user_context, buf,
382                                                     reinterpret_cast<uint64_t>(mem));
383     if (err != 0) {
384         halide_free(user_context, mem);
385         return halide_error_code_device_malloc_failed;
386     }
387 
388     return halide_error_code_success;
389 }
390 
halide_hexagon_dma_device_free(void * user_context,halide_buffer_t * buf)391 WEAK int halide_hexagon_dma_device_free(void *user_context, halide_buffer_t *buf) {
392     debug(user_context)
393         << "Hexagon: halide_hexagon_dma_device_free (user_context: " << user_context
394         << ", buf: " << *buf << ")\n";
395 
396     dma_device_handle *dev = (dma_device_handle *)buf->device;
397     void *mem = dev->buffer;
398     halide_hexagon_dma_device_detach_native(user_context, buf);
399 
400     halide_free(user_context, mem);
401 
402     // This is to match what the default implementation of halide_device_free does.
403     buf->set_device_dirty(false);
404     return halide_error_code_success;
405 }
406 
halide_hexagon_dma_allocate_engine(void * user_context,void ** dma_engine)407 WEAK int halide_hexagon_dma_allocate_engine(void *user_context, void **dma_engine) {
408     debug(user_context)
409         << "Hexagon: halide_hexagon_dma_allocate_engine (user_context: " << user_context << ")\n";
410 
411     halide_assert(user_context, dma_engine);
412     debug(user_context) << "    dma_allocate_dma_engine -> ";
413     *dma_engine = halide_hexagon_allocate_dma_resource(user_context);
414     debug(user_context) << "        " << dma_engine << "\n";
415     if (!*dma_engine) {
416         debug(user_context) << "dma_allocate_dma_engine failed.\n";
417         return halide_error_code_generic_error;
418     }
419 
420     return halide_error_code_success;
421 }
422 
halide_hexagon_dma_deallocate_engine(void * user_context,void * dma_engine)423 WEAK int halide_hexagon_dma_deallocate_engine(void *user_context, void *dma_engine) {
424     debug(user_context)
425         << "Hexagon: halide_hexagon_dma_deallocate_engine (user_context: " << user_context
426         << ", dma_engine: " << dma_engine << ")\n";
427 
428     halide_assert(user_context, dma_engine);
429 
430     // Its safe to free descriptors here, even on 1st engine of multi-engines deallocation, since its called outside of pipeline
431     // If descriptors are needed on pipeline re-entry, the pool will also re-populate
432     desc_pool_free(user_context);
433 
434     // Free DMA Resources
435     int err = halide_hexagon_free_dma_resource(user_context, dma_engine);
436     debug(user_context) << "Hexagon:     dma_free_dma_pool done\n";
437     if (err != 0) {
438         debug(user_context) << "Hexagon: Free DMA/Cache Pool failed.\n";
439         return halide_error_code_generic_error;
440     }
441     return halide_error_code_success;
442 }
443 
444 namespace {
445 
dma_prepare_for_copy(void * user_context,struct halide_buffer_t * buf,void * dma_engine,bool is_ubwc,t_eDmaFmt fmt,bool is_write)446 inline int dma_prepare_for_copy(void *user_context, struct halide_buffer_t *buf, void *dma_engine, bool is_ubwc, t_eDmaFmt fmt, bool is_write) {
447     halide_assert(user_context, dma_engine);
448     dma_device_handle *dev = reinterpret_cast<dma_device_handle *>(buf->device);
449     dev->dma_engine = dma_engine;
450     dev->is_ubwc = is_ubwc;
451     dev->fmt = fmt;
452     dev->is_write = is_write;
453     // To compensate driver's adjustment for UV plane size
454     if ((dev->fmt == eDmaFmt_NV12_UV) ||
455         (dev->fmt == eDmaFmt_P010_UV) ||
456         (dev->fmt == eDmaFmt_TP10_UV) ||
457         (dev->fmt == eDmaFmt_NV124R_UV)) {
458         dev->frame_height = dev->frame_height * 2;
459     }
460 
461     return halide_error_code_success;
462 }
463 
464 }  // namespace
465 
halide_hexagon_dma_prepare_for_copy_to_host(void * user_context,struct halide_buffer_t * buf,void * dma_engine,bool is_ubwc,halide_hexagon_image_fmt_t fmt)466 WEAK int halide_hexagon_dma_prepare_for_copy_to_host(void *user_context, struct halide_buffer_t *buf,
467                                                      void *dma_engine, bool is_ubwc, halide_hexagon_image_fmt_t fmt) {
468     debug(user_context)
469         << "Hexagon: halide_hexagon_dma_prepare_for_copy_to_host (user_context: " << user_context
470         << ", buf: " << *buf << ", dma_engine: " << dma_engine << ")\n";
471     t_eDmaFmt format = halide_hexagon_get_dma_format(user_context, fmt);
472     return dma_prepare_for_copy(user_context, buf, dma_engine, is_ubwc, format, 0);
473 }
474 
halide_hexagon_dma_prepare_for_copy_to_device(void * user_context,struct halide_buffer_t * buf,void * dma_engine,bool is_ubwc,halide_hexagon_image_fmt_t fmt)475 WEAK int halide_hexagon_dma_prepare_for_copy_to_device(void *user_context, struct halide_buffer_t *buf,
476                                                        void *dma_engine, bool is_ubwc, halide_hexagon_image_fmt_t fmt) {
477     debug(user_context)
478         << "Hexagon: halide_hexagon_dma_prepare_for_copy_to_device (user_context: " << user_context
479         << ", buf: " << *buf << ", dma_engine: " << dma_engine << ")\n";
480     t_eDmaFmt format = halide_hexagon_get_dma_format(user_context, fmt);
481     return dma_prepare_for_copy(user_context, buf, dma_engine, is_ubwc, format, 1);
482 }
483 
halide_hexagon_dma_unprepare(void * user_context,struct halide_buffer_t * buf)484 WEAK int halide_hexagon_dma_unprepare(void *user_context, struct halide_buffer_t *buf) {
485     debug(user_context)
486         << "Hexagon: halide_hexagon_dma_unprepare (user_context: " << user_context
487         << ", buf: " << *buf << ")\n";
488     //TODO Now that FinishFrame is called by Hexagon DMA Pool Module, need to check if this function is redundant
489     return halide_error_code_success;
490 }
491 
halide_hexagon_dma_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)492 WEAK int halide_hexagon_dma_buffer_copy(void *user_context, struct halide_buffer_t *src,
493                                         const struct halide_device_interface_t *dst_device_interface,
494                                         struct halide_buffer_t *dst) {
495 
496     halide_assert(user_context, dst_device_interface == NULL ||
497                                     dst_device_interface == &hexagon_dma_device_interface);
498 
499     if (src->device_dirty() &&
500         src->device_interface != &hexagon_dma_device_interface) {
501         halide_assert(user_context, dst_device_interface == &hexagon_dma_device_interface);
502         // If the source is not hexagon_dma or host memory, ask the source
503         // device interface to copy to dst host memory first.
504         debug(user_context) << "Hexagon: src->device_interface != &hexagon_dma_device_interface\n";
505         int err = src->device_interface->impl->buffer_copy(user_context, src, NULL, dst);
506         if (err) {
507             error(user_context) << "Hexagon: halide_hexagon_dma_buffer_copy (not DMA) failed: " << err << "\n";
508             return err;
509         }
510         // Now just copy from src to host
511         src = dst;
512     }
513 
514     bool from_host = !src->device_dirty() && src->host != NULL;
515     bool to_host = !dst_device_interface;
516 
517     halide_assert(user_context, from_host || src->device);
518     halide_assert(user_context, to_host || dst->device);
519 
520     halide_assert(user_context, (!from_host && to_host) || (from_host && !to_host));
521 
522     debug(user_context)
523         << "Hexagon: halide_hexagon_dma_buffer_copy (user_context: " << user_context
524         << ", src: " << src << ", dst: " << dst << "\n"
525         << ", DMA Read: " << to_host << ", DMA Write: " << from_host << ")\n";
526 
527     int nRet;
528     if (dst_device_interface == &hexagon_dma_device_interface) {
529         nRet = halide_hexagon_dma_wrapper(user_context, dst, src);
530     } else {
531         nRet = halide_hexagon_dma_wrapper(user_context, src, dst);
532     }
533 
534     return nRet;
535 }
536 
halide_hexagon_dma_copy_to_device(void * user_context,halide_buffer_t * buf)537 WEAK int halide_hexagon_dma_copy_to_device(void *user_context, halide_buffer_t *buf) {
538     debug(user_context)
539         << "Hexagon: halide_hexagon_dma_copy_to_device (user_context: " << user_context
540         << ", buf: " << *buf << ")\n";
541 
542     error(user_context) << "Hexagon: halide_hexagon_dma_copy_to_device not implemented\n";
543     return halide_error_code_copy_to_device_failed;
544 }
545 
halide_hexagon_dma_copy_to_host(void * user_context,struct halide_buffer_t * buf)546 WEAK int halide_hexagon_dma_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
547     debug(user_context)
548         << "Hexagon: halide_hexagon_dma_copy_to_host (user_context: " << user_context
549         << ", buf: " << *buf << ")\n";
550 
551     error(user_context) << "Hexagon: halide_hexagon_dma_copy_to_host not implemented\n";
552     return halide_error_code_copy_to_device_failed;
553 }
554 
halide_hexagon_dma_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)555 WEAK int halide_hexagon_dma_device_crop(void *user_context,
556                                         const struct halide_buffer_t *src,
557                                         struct halide_buffer_t *dst) {
558     debug(user_context)
559         << "Hexagon: halide_hexagon_dma_device_crop (user_context: " << user_context
560         << " src: " << *src << " dst: " << *dst << ")\n";
561 
562     dst->device_interface = src->device_interface;
563 
564     const dma_device_handle *src_dev = (dma_device_handle *)src->device;
565     dma_device_handle *dst_dev = malloc_device_handle();
566     halide_assert(user_context, dst_dev);
567     dst_dev->buffer = src_dev->buffer;
568     dst_dev->offset_wrx = src_dev->offset_wrx + dst->dim[0].min - src->dim[0].min;
569     dst_dev->offset_wry = src_dev->offset_wry + dst->dim[1].min - src->dim[1].min;
570     dst_dev->dma_engine = src_dev->dma_engine;
571     dst_dev->frame_width = src_dev->frame_width;
572     dst_dev->frame_height = src_dev->frame_height;
573     dst_dev->frame_stride = src_dev->frame_stride;
574     dst_dev->is_ubwc = src_dev->is_ubwc;
575     dst_dev->is_write = src_dev->is_write;
576     dst_dev->fmt = src_dev->fmt;
577 
578     dst->device = reinterpret_cast<uint64_t>(dst_dev);
579 
580     return halide_error_code_success;
581 }
582 
halide_hexagon_dma_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)583 WEAK int halide_hexagon_dma_device_slice(void *user_context,
584                                          const struct halide_buffer_t *src,
585                                          int slice_dim, int slice_pos, struct halide_buffer_t *dst) {
586     debug(user_context)
587         << "Hexagon: halide_hexagon_dma_device_slice (user_context: " << user_context
588         << " src: " << *src << " dst: " << *dst << ")\n";
589 
590     halide_assert(user_context, 0);
591 
592     error(user_context) << "Hexagon: halide_hexagon_dma_device_slice not implemented\n";
593     return halide_error_code_generic_error;
594 }
595 
halide_hexagon_dma_device_release_crop(void * user_context,struct halide_buffer_t * buf)596 WEAK int halide_hexagon_dma_device_release_crop(void *user_context, struct halide_buffer_t *buf) {
597     debug(user_context)
598         << "Hexagon: halide_hexagon_dma_device_release_crop (user_context: " << user_context
599         << " buf: " << *buf << ")\n";
600 
601     halide_assert(user_context, buf->device);
602     free((dma_device_handle *)buf->device);
603     buf->device = 0;
604 
605     return halide_error_code_success;
606 }
607 
halide_hexagon_dma_device_sync(void * user_context,struct halide_buffer_t * buf)608 WEAK int halide_hexagon_dma_device_sync(void *user_context, struct halide_buffer_t *buf) {
609     debug(user_context)
610         << "Hexagon: halide_hexagon_dma_device_sync (user_context: " << user_context
611         << " buf: " << *buf << ")\n";
612     // TODO We need to check if any DMA specific action is required here
613     return halide_error_code_success;
614 }
615 
halide_hexagon_dma_device_wrap_native(void * user_context,struct halide_buffer_t * buf,uint64_t handle)616 WEAK int halide_hexagon_dma_device_wrap_native(void *user_context, struct halide_buffer_t *buf,
617                                                uint64_t handle) {
618     debug(user_context)
619         << "Hexagon: halide_hexagon_dma_device_wrap_native (user_context: " << user_context
620         << " buf: " << *buf << " handle: " << handle << ")\n";
621 
622     halide_assert(user_context, buf->device == 0);
623     if (buf->device != 0) {
624         error(user_context) << "Hexagon: halide_hexagon_dma_device_wrap_native buffer already has a device\n";
625         return halide_error_code_device_wrap_native_failed;
626     }
627 
628     buf->device_interface = &hexagon_dma_device_interface;
629     buf->device_interface->impl->use_module();
630 
631     dma_device_handle *dev = malloc_device_handle();
632     halide_assert(user_context, dev);
633     dev->buffer = reinterpret_cast<uint8_t *>(handle);
634     dev->dma_engine = 0;
635     dev->frame_width = buf->dim[0].extent * buf->dim[0].stride;
636     dev->frame_height = buf->dim[1].extent;
637     dev->frame_stride = buf->dim[1].stride;
638     buf->device = reinterpret_cast<uint64_t>(dev);
639 
640     return halide_error_code_success;
641 }
642 
halide_hexagon_dma_device_detach_native(void * user_context,struct halide_buffer_t * buf)643 WEAK int halide_hexagon_dma_device_detach_native(void *user_context, struct halide_buffer_t *buf) {
644     debug(user_context)
645         << "Hexagon: halide_hexagon_dma_device_detach_native (user_context: " << user_context
646         << " buf: " << *buf << ")\n";
647 
648     if (buf->device == 0) {
649         error(user_context) << "Hexagon: halide_hexagon_dma_device_detach_native buffer without a device\n";
650         return halide_error_code_device_detach_native_failed;
651     }
652     halide_assert(user_context, buf->device_interface == &hexagon_dma_device_interface);
653     dma_device_handle *dev = (dma_device_handle *)buf->device;
654     free(dev);
655     buf->device_interface->impl->release_module();
656     buf->device = 0;
657     buf->device_interface = NULL;
658 
659     return halide_error_code_success;
660 }
661 
halide_hexagon_dma_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf)662 WEAK int halide_hexagon_dma_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
663     debug(user_context)
664         << "Hexagon: halide_hexagon_dma_device_and_host_malloc (user_context: " << user_context
665         << " buf: " << *buf << ")\n";
666 
667     return halide_default_device_and_host_malloc(user_context, buf, &hexagon_dma_device_interface);
668 }
669 
halide_hexagon_dma_device_and_host_free(void * user_context,struct halide_buffer_t * buf)670 WEAK int halide_hexagon_dma_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
671     debug(user_context)
672         << "Hexagon: halide_hexagon_dma_device_and_host_free (user_context: " << user_context
673         << " buf: " << *buf << ")\n";
674 
675     return halide_default_device_and_host_free(user_context, buf, &hexagon_dma_device_interface);
676 }
677 
halide_hexagon_dma_device_interface()678 WEAK const halide_device_interface_t *halide_hexagon_dma_device_interface() {
679     return &hexagon_dma_device_interface;
680 }
681 
halide_hexagon_dma_device_release(void * user_context)682 WEAK int halide_hexagon_dma_device_release(void *user_context) {
683     debug(user_context)
684         << "Hexagon: halide_hexagon_dma_device_release (user_context: " << user_context << ")\n";
685 
686     return 0;
687 }
688 
halide_hexagon_dma_power_mode_voting(void * user_context,halide_hexagon_power_mode_t cornercase)689 WEAK int halide_hexagon_dma_power_mode_voting(void *user_context, halide_hexagon_power_mode_t cornercase) {
690     debug(user_context)
691         << "Hexagon: halide_hexagon_dma_power_voting (user_context: " << user_context << ")\n";
692     switch (cornercase) {
693     case halide_hexagon_power_low_2:
694         return nDmaWrapper_PowerVoting(PW_SVS2);
695     case halide_hexagon_power_low:
696         return nDmaWrapper_PowerVoting(PW_SVS);
697     case halide_hexagon_power_low_plus:
698         return nDmaWrapper_PowerVoting(PW_SVS_L1);
699     case halide_hexagon_power_nominal:
700         return nDmaWrapper_PowerVoting(PW_NORMAL);
701     case halide_hexagon_power_nominal_plus:
702         return nDmaWrapper_PowerVoting(PW_NORMAL_L1);
703     case halide_hexagon_power_turbo:
704         return nDmaWrapper_PowerVoting(PW_TURBO);
705     case halide_hexagon_power_default:
706         return nDmaWrapper_PowerVoting(~PW_SVS);
707     default:
708         error(user_context) << "Hexagon: halide_hexagon_dma_power_voting power mode (" << cornercase << ") not found\n";
709         return halide_error_code_generic_error;
710     }
711 }
712 
713 }  // extern "C" linkage
714 
715 namespace Halide {
716 namespace Runtime {
717 namespace Internal {
718 namespace HexagonDma {
719 
720 WEAK halide_device_interface_impl_t hexagon_dma_device_interface_impl = {
721     halide_use_jit_module,
722     halide_release_jit_module,
723     halide_hexagon_dma_device_malloc,
724     halide_hexagon_dma_device_free,
725     halide_hexagon_dma_device_sync,
726     halide_hexagon_dma_device_release,
727     halide_hexagon_dma_copy_to_host,
728     halide_hexagon_dma_copy_to_device,
729     halide_hexagon_dma_device_and_host_malloc,
730     halide_hexagon_dma_device_and_host_free,
731     halide_hexagon_dma_buffer_copy,
732     halide_hexagon_dma_device_crop,
733     halide_hexagon_dma_device_slice,
734     halide_hexagon_dma_device_release_crop,
735     halide_hexagon_dma_device_wrap_native,
736     halide_hexagon_dma_device_detach_native,
737 };
738 
739 WEAK halide_device_interface_t hexagon_dma_device_interface = {
740     halide_device_malloc,
741     halide_device_free,
742     halide_device_sync,
743     halide_device_release,
744     halide_copy_to_host,
745     halide_copy_to_device,
746     halide_device_and_host_malloc,
747     halide_device_and_host_free,
748     halide_buffer_copy,
749     halide_device_crop,
750     halide_device_slice,
751     halide_device_release_crop,
752     halide_device_wrap_native,
753     halide_device_detach_native,
754     NULL,
755     &hexagon_dma_device_interface_impl};
756 
757 }  // namespace HexagonDma
758 }  // namespace Internal
759 }  // namespace Runtime
760 }  // namespace Halide
761