1 #include "HalideRuntimeHexagonDma.h"
2 #include "device_buffer_utils.h"
3 #include "device_interface.h"
4 #include "hexagon_dma_pool.h"
5 #include "mini_hexagon_dma.h"
6 #include "printer.h"
7 #include "runtime_internal.h"
8 #include "scoped_mutex_lock.h"
9
10 namespace Halide {
11 namespace Runtime {
12 namespace Internal {
13 namespace HexagonDma {
14
15 extern WEAK halide_device_interface_t hexagon_dma_device_interface;
16
17 #define descriptor_size 64
18
19 // DMA device handle structure, which holds all the necessary frame related parameters.
20 // To be used for DMA transfer.
21 struct dma_device_handle {
22 uint8_t *buffer;
23 uint16_t offset_rdx;
24 uint16_t offset_rdy;
25 uint16_t offset_wrx;
26 uint16_t offset_wry;
27 void *dma_engine;
28 int frame_width;
29 int frame_height;
30 int frame_stride;
31 bool is_ubwc;
32 bool is_write;
33 t_eDmaFmt fmt;
34 };
35
36 // Allocating memory for DMA device handle. The life time of this memory is till the frame
37 // is active in DMA process.
malloc_device_handle()38 inline dma_device_handle *malloc_device_handle() {
39 dma_device_handle *dev = (dma_device_handle *)malloc(sizeof(dma_device_handle));
40 dev->buffer = 0;
41 dev->offset_rdx = 0;
42 dev->offset_rdy = 0;
43 dev->offset_wrx = 0;
44 dev->offset_wry = 0;
45 dev->dma_engine = 0;
46 dev->frame_width = 0;
47 dev->frame_height = 0;
48 dev->frame_stride = 0;
49 dev->is_ubwc = 0;
50 dev->fmt = eDmaFmt_RawData;
51 dev->is_write = 0;
52 return dev;
53 }
54
55 // Data Structure for chaining of DMA descriptors.
56 typedef struct desc_pool {
57 void *descriptor;
58 bool used;
59 struct desc_pool *next;
60 } desc_pool_t;
61
62 typedef desc_pool_t *pdesc_pool;
63
64 WEAK pdesc_pool dma_desc_pool = NULL;
65 WEAK halide_mutex hexagon_desc_mutex;
66
67 } // namespace HexagonDma
68 } // namespace Internal
69 } // namespace Runtime
70 } // namespace Halide
71
72 using namespace Halide::Runtime::Internal::HexagonDma;
73
74 namespace {
75
76 // Core logic for DMA descriptor Pooling. The idea is to reuse the Allocated cache for descriptors,
77 // if it is free. In case of un availability of free descriptors, two new descriptors are allocated in the cache
78 // and make them available in the pool (128B is the minimum cache size that can be locked)
desc_pool_get(void * user_context)79 void *desc_pool_get(void *user_context) {
80 ScopedMutexLock lock(&hexagon_desc_mutex);
81 pdesc_pool temp = dma_desc_pool;
82 pdesc_pool prev = NULL;
83 // Walk the list
84 while (temp != NULL) {
85 if (!temp->used) {
86 temp->used = true;
87 return (void *)temp->descriptor;
88 }
89 prev = temp;
90 temp = temp->next;
91 }
92 // If we are still here that means temp was null.
93 // We have to allocate two descriptors here, to lock a full cache line
94 temp = (pdesc_pool)malloc(sizeof(desc_pool_t));
95 if (temp == NULL) {
96 error(user_context) << "Hexagon: Out of memory (malloc failed for DMA descriptor pool)\n";
97 return NULL;
98 }
99 uint8_t *desc = (uint8_t *)HAP_cache_lock(sizeof(char) * descriptor_size * 2, NULL);
100 if (desc == NULL) {
101 free(temp);
102 error(user_context) << "Hexagon: Out of memory (HAP_cache_lock failed for descriptor)\n";
103 return NULL;
104 }
105 temp->descriptor = (void *)desc;
106 temp->used = true;
107
108 // Now allocate the second element in list
109 temp->next = (pdesc_pool)malloc(sizeof(desc_pool_t));
110 if (temp->next != NULL) {
111 (temp->next)->descriptor = (void *)(desc + descriptor_size);
112 (temp->next)->used = false;
113 (temp->next)->next = NULL;
114 } else {
115 // no need to throw error since we allocate two descriptor at a time
116 // but only use one
117 debug(user_context) << "Hexagon: malloc failed\n";
118 }
119
120 if (prev != NULL) {
121 prev->next = temp;
122 } else if (dma_desc_pool == NULL) {
123 dma_desc_pool = temp;
124 }
125 return (void *)temp->descriptor;
126 }
127
desc_pool_put(void * user_context,void * desc)128 void desc_pool_put(void *user_context, void *desc) {
129 ScopedMutexLock lock(&hexagon_desc_mutex);
130 halide_assert(user_context, desc);
131 pdesc_pool temp = dma_desc_pool;
132 while (temp != NULL) {
133 if (temp->descriptor == desc) {
134 temp->used = false;
135 return;
136 }
137 temp = temp->next;
138 }
139 error(user_context) << "Hexagon: desc not found " << desc << "\n";
140 }
141
142 // DMA descriptor freeing logic, Two descriptors at a time will be freed.
desc_pool_free(void * user_context)143 void desc_pool_free(void *user_context) {
144 ScopedMutexLock lock(&hexagon_desc_mutex);
145 pdesc_pool temp = dma_desc_pool;
146 while (temp != NULL) {
147 pdesc_pool temp2 = temp;
148 temp = temp->next;
149 if (temp2->descriptor != NULL) {
150 HAP_cache_unlock(temp2->descriptor);
151 }
152 free(temp2);
153 temp2 = temp;
154 if (temp != NULL) {
155 temp = temp->next;
156 free(temp2);
157 }
158 }
159
160 // Mark pool is empty, to avoid re-freeing
161 dma_desc_pool = NULL;
162 }
163
164 // User ptovided Image format to DMA format conversion.
halide_hexagon_get_dma_format(void * user_context,const halide_hexagon_image_fmt_t format)165 inline t_eDmaFmt halide_hexagon_get_dma_format(void *user_context, const halide_hexagon_image_fmt_t format) {
166 //A giant switch case to match image formats to dma formats
167 switch (format) {
168 case halide_hexagon_fmt_NV12:
169 return eDmaFmt_NV12;
170 case halide_hexagon_fmt_NV12_Y:
171 return eDmaFmt_NV12_Y;
172 case halide_hexagon_fmt_NV12_UV:
173 return eDmaFmt_NV12_UV;
174 case halide_hexagon_fmt_P010:
175 return eDmaFmt_P010;
176 case halide_hexagon_fmt_P010_Y:
177 return eDmaFmt_P010_Y;
178 case halide_hexagon_fmt_P010_UV:
179 return eDmaFmt_P010_UV;
180 case halide_hexagon_fmt_TP10:
181 return eDmaFmt_TP10;
182 case halide_hexagon_fmt_TP10_Y:
183 return eDmaFmt_TP10_Y;
184 case halide_hexagon_fmt_TP10_UV:
185 return eDmaFmt_TP10_UV;
186 case halide_hexagon_fmt_NV124R:
187 return eDmaFmt_NV124R;
188 case halide_hexagon_fmt_NV124R_Y:
189 return eDmaFmt_NV124R_Y;
190 case halide_hexagon_fmt_NV124R_UV:
191 return eDmaFmt_NV124R_UV;
192 case halide_hexagon_fmt_RawData:
193 return eDmaFmt_RawData;
194 default:
195 error(user_context) << "Hexagon: DMA Format Mismatch " << format << "\n";
196 return eDmaFmt_MAX;
197 }
198 }
199
200 // The core logic of DMA Transfer. This API uses the DMA device handle populated prior to calling this
201 // and does the necessary steps for performing the DMA Operation.
halide_hexagon_dma_wrapper(void * user_context,struct halide_buffer_t * src,struct halide_buffer_t * dst)202 int halide_hexagon_dma_wrapper(void *user_context, struct halide_buffer_t *src,
203 struct halide_buffer_t *dst) {
204
205 dma_device_handle *dev = NULL;
206 dev = (dma_device_handle *)src->device;
207
208 debug(user_context)
209 << "Hexagon dev handle: buffer: " << dev->buffer
210 << " dev_offset(rdx: : " << dev->offset_rdx << " rdy: " << dev->offset_rdy << ")"
211 << " dev_offset(wrx: : " << dev->offset_wrx << " wry: " << dev->offset_wry << ")"
212 << " frame(w: " << dev->frame_width << " h: " << dev->frame_height << " s: " << dev->frame_stride << ")"
213 << "\n";
214
215 debug(user_context)
216 << "size_in_bytes() src: " << static_cast<uint32>(src->size_in_bytes())
217 << " dst: " << static_cast<uint32>(dst->size_in_bytes())
218 << "\n";
219
220 // Assert if buffer dimensions do not fulfill the format requirements
221 if (dev->fmt == eDmaFmt_RawData) {
222 halide_assert(user_context, src->dimensions <= 3);
223 }
224
225 if ((dev->fmt == eDmaFmt_NV12_Y) ||
226 (dev->fmt == eDmaFmt_P010_Y) ||
227 (dev->fmt == eDmaFmt_TP10_Y) ||
228 (dev->fmt == eDmaFmt_NV124R_Y)) {
229 halide_assert(user_context, src->dimensions == 2);
230 }
231
232 if ((dev->fmt == eDmaFmt_NV12_UV) ||
233 (dev->fmt == eDmaFmt_P010_UV) ||
234 (dev->fmt == eDmaFmt_TP10_UV) ||
235 (dev->fmt == eDmaFmt_NV124R_UV)) {
236 halide_assert(user_context, src->dimensions == 3);
237 halide_assert(user_context, src->dim[0].stride == 2);
238 halide_assert(user_context, src->dim[2].stride == 1);
239 halide_assert(user_context, src->dim[2].min == 0);
240 halide_assert(user_context, src->dim[2].extent == 2);
241 }
242
243 t_StDmaWrapper_RoiAlignInfo stWalkSize = {
244 static_cast<uint16>(dst->dim[0].extent * dst->dim[0].stride),
245 static_cast<uint16>(dst->dim[1].extent)};
246 int nRet = nDmaWrapper_GetRecommendedWalkSize(dev->fmt, dev->is_ubwc, &stWalkSize);
247
248 int roi_stride = nDmaWrapper_GetRecommendedIntermBufStride(dev->fmt, &stWalkSize, dev->is_ubwc);
249 int roi_width = stWalkSize.u16W;
250 int roi_height = stWalkSize.u16H;
251
252 debug(user_context)
253 << "Hexagon: Recommended ROI(w: " << roi_width << " h: " << roi_height << " s: " << roi_stride << ")\n";
254
255 // account for folding, where the dim[1].stride reflects the fold_storage stride
256 if (dst->dim[1].stride > roi_stride)
257 roi_stride = dst->dim[1].stride;
258
259 // Assert if destination stride is a multipe of recommended stride
260 halide_assert(user_context, ((dst->dim[1].stride % roi_stride) == 0));
261
262 // Return NULL if descriptor is not allocated
263 void *desc_addr = desc_pool_get(user_context);
264 if (desc_addr == NULL) {
265 debug(user_context) << "Hexagon: DMA descriptor allocation error\n";
266 return halide_error_code_device_buffer_copy_failed;
267 }
268
269 int buf_size = roi_stride * roi_height * src->type.bytes();
270 debug(user_context) << "Hexagon: cache buffer size " << buf_size << "\n";
271
272 t_StDmaWrapper_DmaTransferSetup stDmaTransferParm;
273 stDmaTransferParm.eFmt = dev->fmt;
274 stDmaTransferParm.u16FrameW = dev->frame_width;
275 stDmaTransferParm.u16FrameH = dev->frame_height;
276 stDmaTransferParm.u16FrameStride = dev->frame_stride;
277 stDmaTransferParm.u16RoiW = roi_width;
278 stDmaTransferParm.u16RoiH = roi_height;
279 stDmaTransferParm.u16RoiStride = roi_stride;
280 stDmaTransferParm.bIsFmtUbwc = dev->is_ubwc;
281 stDmaTransferParm.bUse16BitPaddingInL2 = 0;
282 stDmaTransferParm.pDescBuf = desc_addr;
283 stDmaTransferParm.pTcmDataBuf = reinterpret_cast<void *>(dst->host);
284 stDmaTransferParm.pFrameBuf = dev->buffer;
285 if (dev->is_write) {
286 stDmaTransferParm.eTransferType = eDmaWrapper_L2ToDdr;
287 stDmaTransferParm.u16RoiX = dev->offset_wrx * dst->dim[0].stride;
288 stDmaTransferParm.u16RoiY = dev->offset_wry;
289 } else {
290 stDmaTransferParm.eTransferType = eDmaWrapper_DdrToL2;
291 stDmaTransferParm.u16RoiX = (dev->offset_rdx + dst->dim[0].min) * dst->dim[0].stride;
292 stDmaTransferParm.u16RoiY = dev->offset_rdy + dst->dim[1].min;
293 }
294
295 // Raw Format Planar
296 if ((dev->fmt == eDmaFmt_RawData) &&
297 (dst->dimensions == 3)) {
298 stDmaTransferParm.u16RoiY = dev->offset_rdy + dst->dim[1].min + (dst->dim[2].min * src->dim[1].stride);
299 }
300
301 // DMA Driver implicitly halves the Height and Y Offset for chroma, based on Y/UV
302 // planar relation for 4:2:0 format, to adjust the for plane size difference.
303 // This driver adjustment is compensated here for Halide that treats Y/UV separately.
304 // i.e. ROI size is same for both Luma and Chroma
305 if ((dev->fmt == eDmaFmt_NV12_UV) ||
306 (dev->fmt == eDmaFmt_P010_UV) ||
307 (dev->fmt == eDmaFmt_TP10_UV) ||
308 (dev->fmt == eDmaFmt_NV124R_UV)) {
309 stDmaTransferParm.u16RoiH = roi_height * 2;
310 if (dev->is_write) {
311 stDmaTransferParm.u16RoiY = stDmaTransferParm.u16RoiY * 2;
312 } else {
313 stDmaTransferParm.u16RoiY = (stDmaTransferParm.u16RoiY - dev->frame_height) * 2;
314 }
315 debug(user_context)
316 << "Hexagon: u16Roi(X: " << stDmaTransferParm.u16RoiX << " Y: " << stDmaTransferParm.u16RoiY
317 << " W: " << stDmaTransferParm.u16RoiW << " H: " << stDmaTransferParm.u16RoiH << ")"
318 << " dst->dim[1].min: " << dst->dim[1].min << "\n";
319 }
320
321 void *dma_engine = halide_hexagon_allocate_from_dma_pool(user_context, dev->dma_engine);
322 if (!dma_engine) {
323 debug(user_context) << "Hexagon: Dma Engine Allocation Faliure\n";
324 return halide_error_code_device_buffer_copy_failed;
325 }
326
327 debug(user_context)
328 << "Hexagon: " << dma_engine << " transfer: " << stDmaTransferParm.pDescBuf << "\n";
329 nRet = nDmaWrapper_DmaTransferSetup(dma_engine, &stDmaTransferParm);
330 if (nRet != QURT_EOK) {
331 error(user_context) << "Hexagon: DMA Transfer Error: " << nRet << "\n";
332 return halide_error_code_device_buffer_copy_failed;
333 }
334
335 debug(user_context) << "Hexagon: " << dma_engine << " move\n";
336 nRet = nDmaWrapper_Move(dma_engine);
337 if (nRet != QURT_EOK) {
338 error(user_context) << "Hexagon: nDmaWrapper_Move error: " << nRet << "\n";
339 return halide_error_code_device_buffer_copy_failed;
340 }
341
342 debug(user_context) << "Hexagon: " << dma_engine << " wait\n";
343 nRet = nDmaWrapper_Wait(dma_engine);
344 if (nRet != QURT_EOK) {
345 error(user_context) << "Hexagon: nDmaWrapper_Wait error: " << nRet << "\n";
346 return halide_error_code_device_buffer_copy_failed;
347 }
348
349 desc_pool_put(user_context, desc_addr);
350 nRet = halide_hexagon_free_to_dma_pool(user_context, dma_engine, dev->dma_engine);
351 if (nRet != halide_error_code_success) {
352 debug(user_context) << "halide_hexagon_free_from_dma_pool error:" << nRet << "\n";
353 return nRet;
354 }
355 return halide_error_code_success;
356 }
357
358 } // namespace
359
360 extern "C" {
361
halide_hexagon_dma_device_malloc(void * user_context,halide_buffer_t * buf)362 WEAK int halide_hexagon_dma_device_malloc(void *user_context, halide_buffer_t *buf) {
363 debug(user_context)
364 << "Hexagon: halide_hexagon_dma_device_malloc (user_context: " << user_context
365 << ", buf: " << *buf << ")\n";
366
367 if (buf->device) {
368 debug(user_context) << "Hexagon: buffer already has a device. No action required\n";
369 return halide_error_code_success;
370 }
371
372 size_t size = buf->size_in_bytes();
373 halide_assert(user_context, size != 0);
374
375 void *mem = halide_malloc(user_context, size);
376 if (!mem) {
377 error(user_context) << "Hexagon: Out of memory (halide_malloc failed for device_malloc)\n";
378 return halide_error_code_out_of_memory;
379 }
380
381 int err = halide_hexagon_dma_device_wrap_native(user_context, buf,
382 reinterpret_cast<uint64_t>(mem));
383 if (err != 0) {
384 halide_free(user_context, mem);
385 return halide_error_code_device_malloc_failed;
386 }
387
388 return halide_error_code_success;
389 }
390
halide_hexagon_dma_device_free(void * user_context,halide_buffer_t * buf)391 WEAK int halide_hexagon_dma_device_free(void *user_context, halide_buffer_t *buf) {
392 debug(user_context)
393 << "Hexagon: halide_hexagon_dma_device_free (user_context: " << user_context
394 << ", buf: " << *buf << ")\n";
395
396 dma_device_handle *dev = (dma_device_handle *)buf->device;
397 void *mem = dev->buffer;
398 halide_hexagon_dma_device_detach_native(user_context, buf);
399
400 halide_free(user_context, mem);
401
402 // This is to match what the default implementation of halide_device_free does.
403 buf->set_device_dirty(false);
404 return halide_error_code_success;
405 }
406
halide_hexagon_dma_allocate_engine(void * user_context,void ** dma_engine)407 WEAK int halide_hexagon_dma_allocate_engine(void *user_context, void **dma_engine) {
408 debug(user_context)
409 << "Hexagon: halide_hexagon_dma_allocate_engine (user_context: " << user_context << ")\n";
410
411 halide_assert(user_context, dma_engine);
412 debug(user_context) << " dma_allocate_dma_engine -> ";
413 *dma_engine = halide_hexagon_allocate_dma_resource(user_context);
414 debug(user_context) << " " << dma_engine << "\n";
415 if (!*dma_engine) {
416 debug(user_context) << "dma_allocate_dma_engine failed.\n";
417 return halide_error_code_generic_error;
418 }
419
420 return halide_error_code_success;
421 }
422
halide_hexagon_dma_deallocate_engine(void * user_context,void * dma_engine)423 WEAK int halide_hexagon_dma_deallocate_engine(void *user_context, void *dma_engine) {
424 debug(user_context)
425 << "Hexagon: halide_hexagon_dma_deallocate_engine (user_context: " << user_context
426 << ", dma_engine: " << dma_engine << ")\n";
427
428 halide_assert(user_context, dma_engine);
429
430 // Its safe to free descriptors here, even on 1st engine of multi-engines deallocation, since its called outside of pipeline
431 // If descriptors are needed on pipeline re-entry, the pool will also re-populate
432 desc_pool_free(user_context);
433
434 // Free DMA Resources
435 int err = halide_hexagon_free_dma_resource(user_context, dma_engine);
436 debug(user_context) << "Hexagon: dma_free_dma_pool done\n";
437 if (err != 0) {
438 debug(user_context) << "Hexagon: Free DMA/Cache Pool failed.\n";
439 return halide_error_code_generic_error;
440 }
441 return halide_error_code_success;
442 }
443
444 namespace {
445
dma_prepare_for_copy(void * user_context,struct halide_buffer_t * buf,void * dma_engine,bool is_ubwc,t_eDmaFmt fmt,bool is_write)446 inline int dma_prepare_for_copy(void *user_context, struct halide_buffer_t *buf, void *dma_engine, bool is_ubwc, t_eDmaFmt fmt, bool is_write) {
447 halide_assert(user_context, dma_engine);
448 dma_device_handle *dev = reinterpret_cast<dma_device_handle *>(buf->device);
449 dev->dma_engine = dma_engine;
450 dev->is_ubwc = is_ubwc;
451 dev->fmt = fmt;
452 dev->is_write = is_write;
453 // To compensate driver's adjustment for UV plane size
454 if ((dev->fmt == eDmaFmt_NV12_UV) ||
455 (dev->fmt == eDmaFmt_P010_UV) ||
456 (dev->fmt == eDmaFmt_TP10_UV) ||
457 (dev->fmt == eDmaFmt_NV124R_UV)) {
458 dev->frame_height = dev->frame_height * 2;
459 }
460
461 return halide_error_code_success;
462 }
463
464 } // namespace
465
halide_hexagon_dma_prepare_for_copy_to_host(void * user_context,struct halide_buffer_t * buf,void * dma_engine,bool is_ubwc,halide_hexagon_image_fmt_t fmt)466 WEAK int halide_hexagon_dma_prepare_for_copy_to_host(void *user_context, struct halide_buffer_t *buf,
467 void *dma_engine, bool is_ubwc, halide_hexagon_image_fmt_t fmt) {
468 debug(user_context)
469 << "Hexagon: halide_hexagon_dma_prepare_for_copy_to_host (user_context: " << user_context
470 << ", buf: " << *buf << ", dma_engine: " << dma_engine << ")\n";
471 t_eDmaFmt format = halide_hexagon_get_dma_format(user_context, fmt);
472 return dma_prepare_for_copy(user_context, buf, dma_engine, is_ubwc, format, 0);
473 }
474
halide_hexagon_dma_prepare_for_copy_to_device(void * user_context,struct halide_buffer_t * buf,void * dma_engine,bool is_ubwc,halide_hexagon_image_fmt_t fmt)475 WEAK int halide_hexagon_dma_prepare_for_copy_to_device(void *user_context, struct halide_buffer_t *buf,
476 void *dma_engine, bool is_ubwc, halide_hexagon_image_fmt_t fmt) {
477 debug(user_context)
478 << "Hexagon: halide_hexagon_dma_prepare_for_copy_to_device (user_context: " << user_context
479 << ", buf: " << *buf << ", dma_engine: " << dma_engine << ")\n";
480 t_eDmaFmt format = halide_hexagon_get_dma_format(user_context, fmt);
481 return dma_prepare_for_copy(user_context, buf, dma_engine, is_ubwc, format, 1);
482 }
483
halide_hexagon_dma_unprepare(void * user_context,struct halide_buffer_t * buf)484 WEAK int halide_hexagon_dma_unprepare(void *user_context, struct halide_buffer_t *buf) {
485 debug(user_context)
486 << "Hexagon: halide_hexagon_dma_unprepare (user_context: " << user_context
487 << ", buf: " << *buf << ")\n";
488 //TODO Now that FinishFrame is called by Hexagon DMA Pool Module, need to check if this function is redundant
489 return halide_error_code_success;
490 }
491
halide_hexagon_dma_buffer_copy(void * user_context,struct halide_buffer_t * src,const struct halide_device_interface_t * dst_device_interface,struct halide_buffer_t * dst)492 WEAK int halide_hexagon_dma_buffer_copy(void *user_context, struct halide_buffer_t *src,
493 const struct halide_device_interface_t *dst_device_interface,
494 struct halide_buffer_t *dst) {
495
496 halide_assert(user_context, dst_device_interface == NULL ||
497 dst_device_interface == &hexagon_dma_device_interface);
498
499 if (src->device_dirty() &&
500 src->device_interface != &hexagon_dma_device_interface) {
501 halide_assert(user_context, dst_device_interface == &hexagon_dma_device_interface);
502 // If the source is not hexagon_dma or host memory, ask the source
503 // device interface to copy to dst host memory first.
504 debug(user_context) << "Hexagon: src->device_interface != &hexagon_dma_device_interface\n";
505 int err = src->device_interface->impl->buffer_copy(user_context, src, NULL, dst);
506 if (err) {
507 error(user_context) << "Hexagon: halide_hexagon_dma_buffer_copy (not DMA) failed: " << err << "\n";
508 return err;
509 }
510 // Now just copy from src to host
511 src = dst;
512 }
513
514 bool from_host = !src->device_dirty() && src->host != NULL;
515 bool to_host = !dst_device_interface;
516
517 halide_assert(user_context, from_host || src->device);
518 halide_assert(user_context, to_host || dst->device);
519
520 halide_assert(user_context, (!from_host && to_host) || (from_host && !to_host));
521
522 debug(user_context)
523 << "Hexagon: halide_hexagon_dma_buffer_copy (user_context: " << user_context
524 << ", src: " << src << ", dst: " << dst << "\n"
525 << ", DMA Read: " << to_host << ", DMA Write: " << from_host << ")\n";
526
527 int nRet;
528 if (dst_device_interface == &hexagon_dma_device_interface) {
529 nRet = halide_hexagon_dma_wrapper(user_context, dst, src);
530 } else {
531 nRet = halide_hexagon_dma_wrapper(user_context, src, dst);
532 }
533
534 return nRet;
535 }
536
halide_hexagon_dma_copy_to_device(void * user_context,halide_buffer_t * buf)537 WEAK int halide_hexagon_dma_copy_to_device(void *user_context, halide_buffer_t *buf) {
538 debug(user_context)
539 << "Hexagon: halide_hexagon_dma_copy_to_device (user_context: " << user_context
540 << ", buf: " << *buf << ")\n";
541
542 error(user_context) << "Hexagon: halide_hexagon_dma_copy_to_device not implemented\n";
543 return halide_error_code_copy_to_device_failed;
544 }
545
halide_hexagon_dma_copy_to_host(void * user_context,struct halide_buffer_t * buf)546 WEAK int halide_hexagon_dma_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
547 debug(user_context)
548 << "Hexagon: halide_hexagon_dma_copy_to_host (user_context: " << user_context
549 << ", buf: " << *buf << ")\n";
550
551 error(user_context) << "Hexagon: halide_hexagon_dma_copy_to_host not implemented\n";
552 return halide_error_code_copy_to_device_failed;
553 }
554
halide_hexagon_dma_device_crop(void * user_context,const struct halide_buffer_t * src,struct halide_buffer_t * dst)555 WEAK int halide_hexagon_dma_device_crop(void *user_context,
556 const struct halide_buffer_t *src,
557 struct halide_buffer_t *dst) {
558 debug(user_context)
559 << "Hexagon: halide_hexagon_dma_device_crop (user_context: " << user_context
560 << " src: " << *src << " dst: " << *dst << ")\n";
561
562 dst->device_interface = src->device_interface;
563
564 const dma_device_handle *src_dev = (dma_device_handle *)src->device;
565 dma_device_handle *dst_dev = malloc_device_handle();
566 halide_assert(user_context, dst_dev);
567 dst_dev->buffer = src_dev->buffer;
568 dst_dev->offset_wrx = src_dev->offset_wrx + dst->dim[0].min - src->dim[0].min;
569 dst_dev->offset_wry = src_dev->offset_wry + dst->dim[1].min - src->dim[1].min;
570 dst_dev->dma_engine = src_dev->dma_engine;
571 dst_dev->frame_width = src_dev->frame_width;
572 dst_dev->frame_height = src_dev->frame_height;
573 dst_dev->frame_stride = src_dev->frame_stride;
574 dst_dev->is_ubwc = src_dev->is_ubwc;
575 dst_dev->is_write = src_dev->is_write;
576 dst_dev->fmt = src_dev->fmt;
577
578 dst->device = reinterpret_cast<uint64_t>(dst_dev);
579
580 return halide_error_code_success;
581 }
582
halide_hexagon_dma_device_slice(void * user_context,const struct halide_buffer_t * src,int slice_dim,int slice_pos,struct halide_buffer_t * dst)583 WEAK int halide_hexagon_dma_device_slice(void *user_context,
584 const struct halide_buffer_t *src,
585 int slice_dim, int slice_pos, struct halide_buffer_t *dst) {
586 debug(user_context)
587 << "Hexagon: halide_hexagon_dma_device_slice (user_context: " << user_context
588 << " src: " << *src << " dst: " << *dst << ")\n";
589
590 halide_assert(user_context, 0);
591
592 error(user_context) << "Hexagon: halide_hexagon_dma_device_slice not implemented\n";
593 return halide_error_code_generic_error;
594 }
595
halide_hexagon_dma_device_release_crop(void * user_context,struct halide_buffer_t * buf)596 WEAK int halide_hexagon_dma_device_release_crop(void *user_context, struct halide_buffer_t *buf) {
597 debug(user_context)
598 << "Hexagon: halide_hexagon_dma_device_release_crop (user_context: " << user_context
599 << " buf: " << *buf << ")\n";
600
601 halide_assert(user_context, buf->device);
602 free((dma_device_handle *)buf->device);
603 buf->device = 0;
604
605 return halide_error_code_success;
606 }
607
halide_hexagon_dma_device_sync(void * user_context,struct halide_buffer_t * buf)608 WEAK int halide_hexagon_dma_device_sync(void *user_context, struct halide_buffer_t *buf) {
609 debug(user_context)
610 << "Hexagon: halide_hexagon_dma_device_sync (user_context: " << user_context
611 << " buf: " << *buf << ")\n";
612 // TODO We need to check if any DMA specific action is required here
613 return halide_error_code_success;
614 }
615
halide_hexagon_dma_device_wrap_native(void * user_context,struct halide_buffer_t * buf,uint64_t handle)616 WEAK int halide_hexagon_dma_device_wrap_native(void *user_context, struct halide_buffer_t *buf,
617 uint64_t handle) {
618 debug(user_context)
619 << "Hexagon: halide_hexagon_dma_device_wrap_native (user_context: " << user_context
620 << " buf: " << *buf << " handle: " << handle << ")\n";
621
622 halide_assert(user_context, buf->device == 0);
623 if (buf->device != 0) {
624 error(user_context) << "Hexagon: halide_hexagon_dma_device_wrap_native buffer already has a device\n";
625 return halide_error_code_device_wrap_native_failed;
626 }
627
628 buf->device_interface = &hexagon_dma_device_interface;
629 buf->device_interface->impl->use_module();
630
631 dma_device_handle *dev = malloc_device_handle();
632 halide_assert(user_context, dev);
633 dev->buffer = reinterpret_cast<uint8_t *>(handle);
634 dev->dma_engine = 0;
635 dev->frame_width = buf->dim[0].extent * buf->dim[0].stride;
636 dev->frame_height = buf->dim[1].extent;
637 dev->frame_stride = buf->dim[1].stride;
638 buf->device = reinterpret_cast<uint64_t>(dev);
639
640 return halide_error_code_success;
641 }
642
halide_hexagon_dma_device_detach_native(void * user_context,struct halide_buffer_t * buf)643 WEAK int halide_hexagon_dma_device_detach_native(void *user_context, struct halide_buffer_t *buf) {
644 debug(user_context)
645 << "Hexagon: halide_hexagon_dma_device_detach_native (user_context: " << user_context
646 << " buf: " << *buf << ")\n";
647
648 if (buf->device == 0) {
649 error(user_context) << "Hexagon: halide_hexagon_dma_device_detach_native buffer without a device\n";
650 return halide_error_code_device_detach_native_failed;
651 }
652 halide_assert(user_context, buf->device_interface == &hexagon_dma_device_interface);
653 dma_device_handle *dev = (dma_device_handle *)buf->device;
654 free(dev);
655 buf->device_interface->impl->release_module();
656 buf->device = 0;
657 buf->device_interface = NULL;
658
659 return halide_error_code_success;
660 }
661
halide_hexagon_dma_device_and_host_malloc(void * user_context,struct halide_buffer_t * buf)662 WEAK int halide_hexagon_dma_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
663 debug(user_context)
664 << "Hexagon: halide_hexagon_dma_device_and_host_malloc (user_context: " << user_context
665 << " buf: " << *buf << ")\n";
666
667 return halide_default_device_and_host_malloc(user_context, buf, &hexagon_dma_device_interface);
668 }
669
halide_hexagon_dma_device_and_host_free(void * user_context,struct halide_buffer_t * buf)670 WEAK int halide_hexagon_dma_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
671 debug(user_context)
672 << "Hexagon: halide_hexagon_dma_device_and_host_free (user_context: " << user_context
673 << " buf: " << *buf << ")\n";
674
675 return halide_default_device_and_host_free(user_context, buf, &hexagon_dma_device_interface);
676 }
677
halide_hexagon_dma_device_interface()678 WEAK const halide_device_interface_t *halide_hexagon_dma_device_interface() {
679 return &hexagon_dma_device_interface;
680 }
681
halide_hexagon_dma_device_release(void * user_context)682 WEAK int halide_hexagon_dma_device_release(void *user_context) {
683 debug(user_context)
684 << "Hexagon: halide_hexagon_dma_device_release (user_context: " << user_context << ")\n";
685
686 return 0;
687 }
688
halide_hexagon_dma_power_mode_voting(void * user_context,halide_hexagon_power_mode_t cornercase)689 WEAK int halide_hexagon_dma_power_mode_voting(void *user_context, halide_hexagon_power_mode_t cornercase) {
690 debug(user_context)
691 << "Hexagon: halide_hexagon_dma_power_voting (user_context: " << user_context << ")\n";
692 switch (cornercase) {
693 case halide_hexagon_power_low_2:
694 return nDmaWrapper_PowerVoting(PW_SVS2);
695 case halide_hexagon_power_low:
696 return nDmaWrapper_PowerVoting(PW_SVS);
697 case halide_hexagon_power_low_plus:
698 return nDmaWrapper_PowerVoting(PW_SVS_L1);
699 case halide_hexagon_power_nominal:
700 return nDmaWrapper_PowerVoting(PW_NORMAL);
701 case halide_hexagon_power_nominal_plus:
702 return nDmaWrapper_PowerVoting(PW_NORMAL_L1);
703 case halide_hexagon_power_turbo:
704 return nDmaWrapper_PowerVoting(PW_TURBO);
705 case halide_hexagon_power_default:
706 return nDmaWrapper_PowerVoting(~PW_SVS);
707 default:
708 error(user_context) << "Hexagon: halide_hexagon_dma_power_voting power mode (" << cornercase << ") not found\n";
709 return halide_error_code_generic_error;
710 }
711 }
712
713 } // extern "C" linkage
714
715 namespace Halide {
716 namespace Runtime {
717 namespace Internal {
718 namespace HexagonDma {
719
720 WEAK halide_device_interface_impl_t hexagon_dma_device_interface_impl = {
721 halide_use_jit_module,
722 halide_release_jit_module,
723 halide_hexagon_dma_device_malloc,
724 halide_hexagon_dma_device_free,
725 halide_hexagon_dma_device_sync,
726 halide_hexagon_dma_device_release,
727 halide_hexagon_dma_copy_to_host,
728 halide_hexagon_dma_copy_to_device,
729 halide_hexagon_dma_device_and_host_malloc,
730 halide_hexagon_dma_device_and_host_free,
731 halide_hexagon_dma_buffer_copy,
732 halide_hexagon_dma_device_crop,
733 halide_hexagon_dma_device_slice,
734 halide_hexagon_dma_device_release_crop,
735 halide_hexagon_dma_device_wrap_native,
736 halide_hexagon_dma_device_detach_native,
737 };
738
739 WEAK halide_device_interface_t hexagon_dma_device_interface = {
740 halide_device_malloc,
741 halide_device_free,
742 halide_device_sync,
743 halide_device_release,
744 halide_copy_to_host,
745 halide_copy_to_device,
746 halide_device_and_host_malloc,
747 halide_device_and_host_free,
748 halide_buffer_copy,
749 halide_device_crop,
750 halide_device_slice,
751 halide_device_release_crop,
752 halide_device_wrap_native,
753 halide_device_detach_native,
754 NULL,
755 &hexagon_dma_device_interface_impl};
756
757 } // namespace HexagonDma
758 } // namespace Internal
759 } // namespace Runtime
760 } // namespace Halide
761