1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2021 MediaTek Inc. 4 * Author: Yunfei Dong <yunfei.dong@mediatek.com> 5 */ 6 7 #include <linux/freezer.h> 8 #include <linux/interrupt.h> 9 #include <linux/kthread.h> 10 11 #include "mtk_vcodec_dec_drv.h" 12 #include "mtk_vcodec_dec_pm.h" 13 #include "vdec_msg_queue.h" 14 15 #define VDEC_MSG_QUEUE_TIMEOUT_MS 1500 16 17 /* the size used to store lat slice header information */ 18 #define VDEC_LAT_SLICE_HEADER_SZ (640 * SZ_1K) 19 20 /* the size used to store avc error information */ 21 #define VDEC_ERR_MAP_SZ_AVC (17 * SZ_1K) 22 23 #define VDEC_RD_MV_BUFFER_SZ (((SZ_4K * 2304 >> 4) + SZ_1K) << 1) 24 #define VDEC_LAT_TILE_SZ (64 * V4L2_AV1_MAX_TILE_COUNT) 25 26 /* core will read the trans buffer which decoded by lat to decode again. 27 * The trans buffer size of FHD and 4K bitstreams are different. 28 */ 29 static int vde_msg_queue_get_trans_size(int width, int height) 30 { 31 if (width > 1920 || height > 1088) 32 return 30 * SZ_1M; 33 else 34 return 6 * SZ_1M; 35 } 36 37 void vdec_msg_queue_init_ctx(struct vdec_msg_queue_ctx *ctx, int hardware_index) 38 { 39 init_waitqueue_head(&ctx->ready_to_use); 40 INIT_LIST_HEAD(&ctx->ready_queue); 41 spin_lock_init(&ctx->ready_lock); 42 ctx->ready_num = 0; 43 ctx->hardware_index = hardware_index; 44 } 45 46 static struct list_head *vdec_get_buf_list(int hardware_index, struct vdec_lat_buf *buf) 47 { 48 switch (hardware_index) { 49 case MTK_VDEC_CORE: 50 return &buf->core_list; 51 case MTK_VDEC_LAT0: 52 return &buf->lat_list; 53 default: 54 return NULL; 55 } 56 } 57 58 static void vdec_msg_queue_inc(struct vdec_msg_queue *msg_queue, int hardware_index) 59 { 60 if (hardware_index == MTK_VDEC_CORE) 61 atomic_inc(&msg_queue->core_list_cnt); 62 else 63 atomic_inc(&msg_queue->lat_list_cnt); 64 } 65 66 static void vdec_msg_queue_dec(struct vdec_msg_queue *msg_queue, int hardware_index) 67 { 68 if (hardware_index == MTK_VDEC_CORE) 69 atomic_dec(&msg_queue->core_list_cnt); 70 else 71 atomic_dec(&msg_queue->lat_list_cnt); 72 } 73 74 int vdec_msg_queue_qbuf(struct vdec_msg_queue_ctx *msg_ctx, struct vdec_lat_buf *buf) 75 { 76 struct list_head *head; 77 78 head = vdec_get_buf_list(msg_ctx->hardware_index, buf); 79 if (!head) { 80 mtk_v4l2_vdec_err(buf->ctx, "fail to qbuf: %d", msg_ctx->hardware_index); 81 return -EINVAL; 82 } 83 84 spin_lock(&msg_ctx->ready_lock); 85 list_add_tail(head, &msg_ctx->ready_queue); 86 msg_ctx->ready_num++; 87 88 vdec_msg_queue_inc(&buf->ctx->msg_queue, msg_ctx->hardware_index); 89 if (msg_ctx->hardware_index != MTK_VDEC_CORE) { 90 wake_up_all(&msg_ctx->ready_to_use); 91 } else { 92 if (!(buf->ctx->msg_queue.status & CONTEXT_LIST_QUEUED)) { 93 queue_work(buf->ctx->dev->core_workqueue, &buf->ctx->msg_queue.core_work); 94 buf->ctx->msg_queue.status |= CONTEXT_LIST_QUEUED; 95 } 96 } 97 98 mtk_v4l2_vdec_dbg(3, buf->ctx, "enqueue buf type: %d addr: 0x%p num: %d", 99 msg_ctx->hardware_index, buf, msg_ctx->ready_num); 100 spin_unlock(&msg_ctx->ready_lock); 101 102 return 0; 103 } 104 105 static bool vdec_msg_queue_wait_event(struct vdec_msg_queue_ctx *msg_ctx) 106 { 107 int ret; 108 109 ret = wait_event_timeout(msg_ctx->ready_to_use, 110 !list_empty(&msg_ctx->ready_queue), 111 msecs_to_jiffies(VDEC_MSG_QUEUE_TIMEOUT_MS)); 112 if (!ret) 113 return false; 114 115 return true; 116 } 117 118 struct vdec_lat_buf *vdec_msg_queue_dqbuf(struct vdec_msg_queue_ctx *msg_ctx) 119 { 120 struct vdec_lat_buf *buf; 121 struct list_head *head; 122 int ret; 123 124 spin_lock(&msg_ctx->ready_lock); 125 if (list_empty(&msg_ctx->ready_queue)) { 126 spin_unlock(&msg_ctx->ready_lock); 127 128 if (msg_ctx->hardware_index == MTK_VDEC_CORE) 129 return NULL; 130 131 ret = vdec_msg_queue_wait_event(msg_ctx); 132 if (!ret) 133 return NULL; 134 spin_lock(&msg_ctx->ready_lock); 135 } 136 137 if (msg_ctx->hardware_index == MTK_VDEC_CORE) 138 buf = list_first_entry(&msg_ctx->ready_queue, 139 struct vdec_lat_buf, core_list); 140 else 141 buf = list_first_entry(&msg_ctx->ready_queue, 142 struct vdec_lat_buf, lat_list); 143 144 head = vdec_get_buf_list(msg_ctx->hardware_index, buf); 145 if (!head) { 146 spin_unlock(&msg_ctx->ready_lock); 147 mtk_v4l2_vdec_err(buf->ctx, "fail to dqbuf: %d", msg_ctx->hardware_index); 148 return NULL; 149 } 150 list_del(head); 151 vdec_msg_queue_dec(&buf->ctx->msg_queue, msg_ctx->hardware_index); 152 153 msg_ctx->ready_num--; 154 mtk_v4l2_vdec_dbg(3, buf->ctx, "dqueue buf type:%d addr: 0x%p num: %d", 155 msg_ctx->hardware_index, buf, msg_ctx->ready_num); 156 spin_unlock(&msg_ctx->ready_lock); 157 158 return buf; 159 } 160 161 void vdec_msg_queue_update_ube_rptr(struct vdec_msg_queue *msg_queue, uint64_t ube_rptr) 162 { 163 spin_lock(&msg_queue->lat_ctx.ready_lock); 164 msg_queue->wdma_rptr_addr = ube_rptr; 165 mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "update ube rprt (0x%llx)", ube_rptr); 166 spin_unlock(&msg_queue->lat_ctx.ready_lock); 167 } 168 169 void vdec_msg_queue_update_ube_wptr(struct vdec_msg_queue *msg_queue, uint64_t ube_wptr) 170 { 171 spin_lock(&msg_queue->lat_ctx.ready_lock); 172 msg_queue->wdma_wptr_addr = ube_wptr; 173 mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "update ube wprt: (0x%llx 0x%llx) offset: 0x%llx", 174 msg_queue->wdma_rptr_addr, msg_queue->wdma_wptr_addr, 175 ube_wptr); 176 spin_unlock(&msg_queue->lat_ctx.ready_lock); 177 } 178 179 bool vdec_msg_queue_wait_lat_buf_full(struct vdec_msg_queue *msg_queue) 180 { 181 if (atomic_read(&msg_queue->lat_list_cnt) == NUM_BUFFER_COUNT) { 182 mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "wait buf full: (%d %d) ready:%d status:%d", 183 atomic_read(&msg_queue->lat_list_cnt), 184 atomic_read(&msg_queue->core_list_cnt), 185 msg_queue->lat_ctx.ready_num, msg_queue->status); 186 return true; 187 } 188 189 msg_queue->flush_done = false; 190 vdec_msg_queue_qbuf(&msg_queue->core_ctx, &msg_queue->empty_lat_buf); 191 wait_event(msg_queue->core_dec_done, msg_queue->flush_done); 192 193 mtk_v4l2_vdec_dbg(3, msg_queue->ctx, "flush done => ready_num:%d status:%d list(%d %d)", 194 msg_queue->lat_ctx.ready_num, msg_queue->status, 195 atomic_read(&msg_queue->lat_list_cnt), 196 atomic_read(&msg_queue->core_list_cnt)); 197 198 return false; 199 } 200 201 void vdec_msg_queue_deinit(struct vdec_msg_queue *msg_queue, 202 struct mtk_vcodec_dec_ctx *ctx) 203 { 204 struct vdec_lat_buf *lat_buf; 205 struct mtk_vcodec_mem *mem; 206 int i; 207 208 mem = &msg_queue->wdma_addr; 209 if (mem->va) 210 mtk_vcodec_mem_free(ctx, mem); 211 for (i = 0; i < NUM_BUFFER_COUNT; i++) { 212 lat_buf = &msg_queue->lat_buf[i]; 213 214 mem = &lat_buf->wdma_err_addr; 215 if (mem->va) 216 mtk_vcodec_mem_free(ctx, mem); 217 218 mem = &lat_buf->slice_bc_addr; 219 if (mem->va) 220 mtk_vcodec_mem_free(ctx, mem); 221 222 mem = &lat_buf->rd_mv_addr; 223 if (mem->va) 224 mtk_vcodec_mem_free(ctx, mem); 225 226 mem = &lat_buf->tile_addr; 227 if (mem->va) 228 mtk_vcodec_mem_free(ctx, mem); 229 230 kfree(lat_buf->private_data); 231 lat_buf->private_data = NULL; 232 } 233 234 if (msg_queue->wdma_addr.size) 235 cancel_work_sync(&msg_queue->core_work); 236 } 237 238 static void vdec_msg_queue_core_work(struct work_struct *work) 239 { 240 struct vdec_msg_queue *msg_queue = 241 container_of(work, struct vdec_msg_queue, core_work); 242 struct mtk_vcodec_dec_ctx *ctx = 243 container_of(msg_queue, struct mtk_vcodec_dec_ctx, msg_queue); 244 struct mtk_vcodec_dec_dev *dev = ctx->dev; 245 struct vdec_lat_buf *lat_buf; 246 247 spin_lock(&msg_queue->core_ctx.ready_lock); 248 ctx->msg_queue.status &= ~CONTEXT_LIST_QUEUED; 249 spin_unlock(&msg_queue->core_ctx.ready_lock); 250 251 lat_buf = vdec_msg_queue_dqbuf(&msg_queue->core_ctx); 252 if (!lat_buf) 253 return; 254 255 if (lat_buf->is_last_frame) { 256 ctx->msg_queue.status = CONTEXT_LIST_DEC_DONE; 257 msg_queue->flush_done = true; 258 wake_up(&ctx->msg_queue.core_dec_done); 259 260 return; 261 } 262 263 ctx = lat_buf->ctx; 264 mtk_vcodec_dec_enable_hardware(ctx, MTK_VDEC_CORE); 265 mtk_vcodec_set_curr_ctx(dev, ctx, MTK_VDEC_CORE); 266 267 lat_buf->core_decode(lat_buf); 268 269 mtk_vcodec_set_curr_ctx(dev, NULL, MTK_VDEC_CORE); 270 mtk_vcodec_dec_disable_hardware(ctx, MTK_VDEC_CORE); 271 vdec_msg_queue_qbuf(&ctx->msg_queue.lat_ctx, lat_buf); 272 273 if (!(ctx->msg_queue.status & CONTEXT_LIST_QUEUED) && 274 atomic_read(&msg_queue->core_list_cnt)) { 275 spin_lock(&msg_queue->core_ctx.ready_lock); 276 ctx->msg_queue.status |= CONTEXT_LIST_QUEUED; 277 spin_unlock(&msg_queue->core_ctx.ready_lock); 278 queue_work(ctx->dev->core_workqueue, &msg_queue->core_work); 279 } 280 } 281 282 int vdec_msg_queue_init(struct vdec_msg_queue *msg_queue, 283 struct mtk_vcodec_dec_ctx *ctx, core_decode_cb_t core_decode, 284 int private_size) 285 { 286 struct vdec_lat_buf *lat_buf; 287 int i, err; 288 289 /* already init msg queue */ 290 if (msg_queue->wdma_addr.size) 291 return 0; 292 293 vdec_msg_queue_init_ctx(&msg_queue->lat_ctx, MTK_VDEC_LAT0); 294 vdec_msg_queue_init_ctx(&msg_queue->core_ctx, MTK_VDEC_CORE); 295 INIT_WORK(&msg_queue->core_work, vdec_msg_queue_core_work); 296 297 atomic_set(&msg_queue->lat_list_cnt, 0); 298 atomic_set(&msg_queue->core_list_cnt, 0); 299 init_waitqueue_head(&msg_queue->core_dec_done); 300 msg_queue->status = CONTEXT_LIST_EMPTY; 301 302 msg_queue->wdma_addr.size = 303 vde_msg_queue_get_trans_size(ctx->picinfo.buf_w, 304 ctx->picinfo.buf_h); 305 err = mtk_vcodec_mem_alloc(ctx, &msg_queue->wdma_addr); 306 if (err) { 307 mtk_v4l2_vdec_err(ctx, "failed to allocate wdma_addr buf"); 308 msg_queue->wdma_addr.size = 0; 309 return -ENOMEM; 310 } 311 msg_queue->wdma_rptr_addr = msg_queue->wdma_addr.dma_addr; 312 msg_queue->wdma_wptr_addr = msg_queue->wdma_addr.dma_addr; 313 314 msg_queue->empty_lat_buf.ctx = ctx; 315 msg_queue->empty_lat_buf.core_decode = NULL; 316 msg_queue->empty_lat_buf.is_last_frame = true; 317 318 msg_queue->ctx = ctx; 319 for (i = 0; i < NUM_BUFFER_COUNT; i++) { 320 lat_buf = &msg_queue->lat_buf[i]; 321 322 lat_buf->wdma_err_addr.size = VDEC_ERR_MAP_SZ_AVC; 323 err = mtk_vcodec_mem_alloc(ctx, &lat_buf->wdma_err_addr); 324 if (err) { 325 mtk_v4l2_vdec_err(ctx, "failed to allocate wdma_err_addr buf[%d]", i); 326 goto mem_alloc_err; 327 } 328 329 lat_buf->slice_bc_addr.size = VDEC_LAT_SLICE_HEADER_SZ; 330 err = mtk_vcodec_mem_alloc(ctx, &lat_buf->slice_bc_addr); 331 if (err) { 332 mtk_v4l2_vdec_err(ctx, "failed to allocate wdma_addr buf[%d]", i); 333 goto mem_alloc_err; 334 } 335 336 if (ctx->current_codec == V4L2_PIX_FMT_AV1_FRAME) { 337 lat_buf->rd_mv_addr.size = VDEC_RD_MV_BUFFER_SZ; 338 err = mtk_vcodec_mem_alloc(ctx, &lat_buf->rd_mv_addr); 339 if (err) { 340 mtk_v4l2_vdec_err(ctx, "failed to allocate rd_mv_addr buf[%d]", i); 341 goto mem_alloc_err; 342 } 343 344 lat_buf->tile_addr.size = VDEC_LAT_TILE_SZ; 345 err = mtk_vcodec_mem_alloc(ctx, &lat_buf->tile_addr); 346 if (err) { 347 mtk_v4l2_vdec_err(ctx, "failed to allocate tile_addr buf[%d]", i); 348 goto mem_alloc_err; 349 } 350 } 351 352 lat_buf->private_data = kzalloc(private_size, GFP_KERNEL); 353 if (!lat_buf->private_data) { 354 err = -ENOMEM; 355 goto mem_alloc_err; 356 } 357 358 lat_buf->ctx = ctx; 359 lat_buf->core_decode = core_decode; 360 lat_buf->is_last_frame = false; 361 err = vdec_msg_queue_qbuf(&msg_queue->lat_ctx, lat_buf); 362 if (err) { 363 mtk_v4l2_vdec_err(ctx, "failed to qbuf buf[%d]", i); 364 goto mem_alloc_err; 365 } 366 } 367 return 0; 368 369 mem_alloc_err: 370 vdec_msg_queue_deinit(msg_queue, ctx); 371 return err; 372 } 373