1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cedrus VPU driver
4  *
5  * Copyright (C) 2013 Jens Kuske <jenskuske@gmail.com>
6  * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
7  * Copyright (C) 2018 Bootlin
8  */
9 
10 #include <linux/delay.h>
11 #include <linux/types.h>
12 
13 #include <media/videobuf2-dma-contig.h>
14 
15 #include "cedrus.h"
16 #include "cedrus_hw.h"
17 #include "cedrus_regs.h"
18 
19 /*
20  * These are the sizes for side buffers required by the hardware for storing
21  * internal decoding metadata. They match the values used by the early BSP
22  * implementations, that were initially exposed in libvdpau-sunxi.
23  * Subsequent BSP implementations seem to double the neighbor info buffer size
24  * for the H6 SoC, which may be related to 10 bit H265 support.
25  */
26 #define CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE	(794 * SZ_1K)
27 #define CEDRUS_H265_ENTRY_POINTS_BUF_SIZE	(4 * SZ_1K)
28 #define CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE	160
29 
30 struct cedrus_h265_sram_frame_info {
31 	__le32	top_pic_order_cnt;
32 	__le32	bottom_pic_order_cnt;
33 	__le32	top_mv_col_buf_addr;
34 	__le32	bottom_mv_col_buf_addr;
35 	__le32	luma_addr;
36 	__le32	chroma_addr;
37 } __packed;
38 
39 struct cedrus_h265_sram_pred_weight {
40 	__s8	delta_weight;
41 	__s8	offset;
42 } __packed;
43 
44 static enum cedrus_irq_status cedrus_h265_irq_status(struct cedrus_ctx *ctx)
45 {
46 	struct cedrus_dev *dev = ctx->dev;
47 	u32 reg;
48 
49 	reg = cedrus_read(dev, VE_DEC_H265_STATUS);
50 	reg &= VE_DEC_H265_STATUS_CHECK_MASK;
51 
52 	if (reg & VE_DEC_H265_STATUS_CHECK_ERROR ||
53 	    !(reg & VE_DEC_H265_STATUS_SUCCESS))
54 		return CEDRUS_IRQ_ERROR;
55 
56 	return CEDRUS_IRQ_OK;
57 }
58 
59 static void cedrus_h265_irq_clear(struct cedrus_ctx *ctx)
60 {
61 	struct cedrus_dev *dev = ctx->dev;
62 
63 	cedrus_write(dev, VE_DEC_H265_STATUS, VE_DEC_H265_STATUS_CHECK_MASK);
64 }
65 
66 static void cedrus_h265_irq_disable(struct cedrus_ctx *ctx)
67 {
68 	struct cedrus_dev *dev = ctx->dev;
69 	u32 reg = cedrus_read(dev, VE_DEC_H265_CTRL);
70 
71 	reg &= ~VE_DEC_H265_CTRL_IRQ_MASK;
72 
73 	cedrus_write(dev, VE_DEC_H265_CTRL, reg);
74 }
75 
76 static void cedrus_h265_sram_write_offset(struct cedrus_dev *dev, u32 offset)
77 {
78 	cedrus_write(dev, VE_DEC_H265_SRAM_OFFSET, offset);
79 }
80 
81 static void cedrus_h265_sram_write_data(struct cedrus_dev *dev, void *data,
82 					unsigned int size)
83 {
84 	u32 *word = data;
85 
86 	while (size >= sizeof(u32)) {
87 		cedrus_write(dev, VE_DEC_H265_SRAM_DATA, *word++);
88 		size -= sizeof(u32);
89 	}
90 }
91 
92 static inline dma_addr_t
93 cedrus_h265_frame_info_mv_col_buf_addr(struct cedrus_ctx *ctx,
94 				       unsigned int index, unsigned int field)
95 {
96 	return ctx->codec.h265.mv_col_buf_addr + index *
97 	       ctx->codec.h265.mv_col_buf_unit_size +
98 	       field * ctx->codec.h265.mv_col_buf_unit_size / 2;
99 }
100 
101 static void cedrus_h265_frame_info_write_single(struct cedrus_ctx *ctx,
102 						unsigned int index,
103 						bool field_pic,
104 						u32 pic_order_cnt[],
105 						int buffer_index)
106 {
107 	struct cedrus_dev *dev = ctx->dev;
108 	dma_addr_t dst_luma_addr = cedrus_dst_buf_addr(ctx, buffer_index, 0);
109 	dma_addr_t dst_chroma_addr = cedrus_dst_buf_addr(ctx, buffer_index, 1);
110 	dma_addr_t mv_col_buf_addr[2] = {
111 		cedrus_h265_frame_info_mv_col_buf_addr(ctx, buffer_index, 0),
112 		cedrus_h265_frame_info_mv_col_buf_addr(ctx, buffer_index,
113 						       field_pic ? 1 : 0)
114 	};
115 	u32 offset = VE_DEC_H265_SRAM_OFFSET_FRAME_INFO +
116 		     VE_DEC_H265_SRAM_OFFSET_FRAME_INFO_UNIT * index;
117 	struct cedrus_h265_sram_frame_info frame_info = {
118 		.top_pic_order_cnt = cpu_to_le32(pic_order_cnt[0]),
119 		.bottom_pic_order_cnt = cpu_to_le32(field_pic ?
120 						    pic_order_cnt[1] :
121 						    pic_order_cnt[0]),
122 		.top_mv_col_buf_addr =
123 			cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])),
124 		.bottom_mv_col_buf_addr = cpu_to_le32(field_pic ?
125 			VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[1]) :
126 			VE_DEC_H265_SRAM_DATA_ADDR_BASE(mv_col_buf_addr[0])),
127 		.luma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_luma_addr)),
128 		.chroma_addr = cpu_to_le32(VE_DEC_H265_SRAM_DATA_ADDR_BASE(dst_chroma_addr)),
129 	};
130 
131 	cedrus_h265_sram_write_offset(dev, offset);
132 	cedrus_h265_sram_write_data(dev, &frame_info, sizeof(frame_info));
133 }
134 
135 static void cedrus_h265_frame_info_write_dpb(struct cedrus_ctx *ctx,
136 					     const struct v4l2_hevc_dpb_entry *dpb,
137 					     u8 num_active_dpb_entries)
138 {
139 	struct vb2_queue *vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx,
140 					       V4L2_BUF_TYPE_VIDEO_CAPTURE);
141 	unsigned int i;
142 
143 	for (i = 0; i < num_active_dpb_entries; i++) {
144 		int buffer_index = vb2_find_timestamp(vq, dpb[i].timestamp, 0);
145 		u32 pic_order_cnt[2] = {
146 			dpb[i].pic_order_cnt[0],
147 			dpb[i].pic_order_cnt[1]
148 		};
149 
150 		cedrus_h265_frame_info_write_single(ctx, i, dpb[i].field_pic,
151 						    pic_order_cnt,
152 						    buffer_index);
153 	}
154 }
155 
156 static void cedrus_h265_ref_pic_list_write(struct cedrus_dev *dev,
157 					   const struct v4l2_hevc_dpb_entry *dpb,
158 					   const u8 list[],
159 					   u8 num_ref_idx_active,
160 					   u32 sram_offset)
161 {
162 	unsigned int i;
163 	u32 word = 0;
164 
165 	cedrus_h265_sram_write_offset(dev, sram_offset);
166 
167 	for (i = 0; i < num_ref_idx_active; i++) {
168 		unsigned int shift = (i % 4) * 8;
169 		unsigned int index = list[i];
170 		u8 value = list[i];
171 
172 		if (dpb[index].flags & V4L2_HEVC_DPB_ENTRY_LONG_TERM_REFERENCE)
173 			value |= VE_DEC_H265_SRAM_REF_PIC_LIST_LT_REF;
174 
175 		/* Each SRAM word gathers up to 4 references. */
176 		word |= value << shift;
177 
178 		/* Write the word to SRAM and clear it for the next batch. */
179 		if ((i % 4) == 3 || i == (num_ref_idx_active - 1)) {
180 			cedrus_h265_sram_write_data(dev, &word, sizeof(word));
181 			word = 0;
182 		}
183 	}
184 }
185 
186 static void cedrus_h265_pred_weight_write(struct cedrus_dev *dev,
187 					  const s8 delta_luma_weight[],
188 					  const s8 luma_offset[],
189 					  const s8 delta_chroma_weight[][2],
190 					  const s8 chroma_offset[][2],
191 					  u8 num_ref_idx_active,
192 					  u32 sram_luma_offset,
193 					  u32 sram_chroma_offset)
194 {
195 	struct cedrus_h265_sram_pred_weight pred_weight[2] = { { 0 } };
196 	unsigned int i, j;
197 
198 	cedrus_h265_sram_write_offset(dev, sram_luma_offset);
199 
200 	for (i = 0; i < num_ref_idx_active; i++) {
201 		unsigned int index = i % 2;
202 
203 		pred_weight[index].delta_weight = delta_luma_weight[i];
204 		pred_weight[index].offset = luma_offset[i];
205 
206 		if (index == 1 || i == (num_ref_idx_active - 1))
207 			cedrus_h265_sram_write_data(dev, (u32 *)&pred_weight,
208 						    sizeof(pred_weight));
209 	}
210 
211 	cedrus_h265_sram_write_offset(dev, sram_chroma_offset);
212 
213 	for (i = 0; i < num_ref_idx_active; i++) {
214 		for (j = 0; j < 2; j++) {
215 			pred_weight[j].delta_weight = delta_chroma_weight[i][j];
216 			pred_weight[j].offset = chroma_offset[i][j];
217 		}
218 
219 		cedrus_h265_sram_write_data(dev, &pred_weight,
220 					    sizeof(pred_weight));
221 	}
222 }
223 
224 static void cedrus_h265_skip_bits(struct cedrus_dev *dev, int num)
225 {
226 	int count = 0;
227 
228 	while (count < num) {
229 		int tmp = min(num - count, 32);
230 
231 		cedrus_write(dev, VE_DEC_H265_TRIGGER,
232 			     VE_DEC_H265_TRIGGER_FLUSH_BITS |
233 			     VE_DEC_H265_TRIGGER_TYPE_N_BITS(tmp));
234 		while (cedrus_read(dev, VE_DEC_H265_STATUS) & VE_DEC_H265_STATUS_VLD_BUSY)
235 			udelay(1);
236 
237 		count += tmp;
238 	}
239 }
240 
241 static void cedrus_h265_write_scaling_list(struct cedrus_ctx *ctx,
242 					   struct cedrus_run *run)
243 {
244 	const struct v4l2_ctrl_hevc_scaling_matrix *scaling;
245 	struct cedrus_dev *dev = ctx->dev;
246 	u32 i, j, k, val;
247 
248 	scaling = run->h265.scaling_matrix;
249 
250 	cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF0,
251 		     (scaling->scaling_list_dc_coef_32x32[1] << 24) |
252 		     (scaling->scaling_list_dc_coef_32x32[0] << 16) |
253 		     (scaling->scaling_list_dc_coef_16x16[1] << 8) |
254 		     (scaling->scaling_list_dc_coef_16x16[0] << 0));
255 
256 	cedrus_write(dev, VE_DEC_H265_SCALING_LIST_DC_COEF1,
257 		     (scaling->scaling_list_dc_coef_16x16[5] << 24) |
258 		     (scaling->scaling_list_dc_coef_16x16[4] << 16) |
259 		     (scaling->scaling_list_dc_coef_16x16[3] << 8) |
260 		     (scaling->scaling_list_dc_coef_16x16[2] << 0));
261 
262 	cedrus_h265_sram_write_offset(dev, VE_DEC_H265_SRAM_OFFSET_SCALING_LISTS);
263 
264 	for (i = 0; i < 6; i++)
265 		for (j = 0; j < 8; j++)
266 			for (k = 0; k < 8; k += 4) {
267 				val = ((u32)scaling->scaling_list_8x8[i][j + (k + 3) * 8] << 24) |
268 				      ((u32)scaling->scaling_list_8x8[i][j + (k + 2) * 8] << 16) |
269 				      ((u32)scaling->scaling_list_8x8[i][j + (k + 1) * 8] << 8) |
270 				      scaling->scaling_list_8x8[i][j + k * 8];
271 				cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val);
272 			}
273 
274 	for (i = 0; i < 2; i++)
275 		for (j = 0; j < 8; j++)
276 			for (k = 0; k < 8; k += 4) {
277 				val = ((u32)scaling->scaling_list_32x32[i][j + (k + 3) * 8] << 24) |
278 				      ((u32)scaling->scaling_list_32x32[i][j + (k + 2) * 8] << 16) |
279 				      ((u32)scaling->scaling_list_32x32[i][j + (k + 1) * 8] << 8) |
280 				      scaling->scaling_list_32x32[i][j + k * 8];
281 				cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val);
282 			}
283 
284 	for (i = 0; i < 6; i++)
285 		for (j = 0; j < 8; j++)
286 			for (k = 0; k < 8; k += 4) {
287 				val = ((u32)scaling->scaling_list_16x16[i][j + (k + 3) * 8] << 24) |
288 				      ((u32)scaling->scaling_list_16x16[i][j + (k + 2) * 8] << 16) |
289 				      ((u32)scaling->scaling_list_16x16[i][j + (k + 1) * 8] << 8) |
290 				      scaling->scaling_list_16x16[i][j + k * 8];
291 				cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val);
292 			}
293 
294 	for (i = 0; i < 6; i++)
295 		for (j = 0; j < 4; j++) {
296 			val = ((u32)scaling->scaling_list_4x4[i][j + 12] << 24) |
297 			      ((u32)scaling->scaling_list_4x4[i][j + 8] << 16) |
298 			      ((u32)scaling->scaling_list_4x4[i][j + 4] << 8) |
299 			      scaling->scaling_list_4x4[i][j];
300 			cedrus_write(dev, VE_DEC_H265_SRAM_DATA, val);
301 		}
302 }
303 
304 static void cedrus_h265_setup(struct cedrus_ctx *ctx,
305 			      struct cedrus_run *run)
306 {
307 	struct cedrus_dev *dev = ctx->dev;
308 	const struct v4l2_ctrl_hevc_sps *sps;
309 	const struct v4l2_ctrl_hevc_pps *pps;
310 	const struct v4l2_ctrl_hevc_slice_params *slice_params;
311 	const struct v4l2_ctrl_hevc_decode_params *decode_params;
312 	const struct v4l2_hevc_pred_weight_table *pred_weight_table;
313 	unsigned int width_in_ctb_luma, ctb_size_luma;
314 	unsigned int log2_max_luma_coding_block_size;
315 	dma_addr_t src_buf_addr;
316 	dma_addr_t src_buf_end_addr;
317 	u32 chroma_log2_weight_denom;
318 	u32 output_pic_list_index;
319 	u32 pic_order_cnt[2];
320 	u32 reg;
321 
322 	sps = run->h265.sps;
323 	pps = run->h265.pps;
324 	slice_params = run->h265.slice_params;
325 	decode_params = run->h265.decode_params;
326 	pred_weight_table = &slice_params->pred_weight_table;
327 
328 	log2_max_luma_coding_block_size =
329 		sps->log2_min_luma_coding_block_size_minus3 + 3 +
330 		sps->log2_diff_max_min_luma_coding_block_size;
331 	ctb_size_luma = 1UL << log2_max_luma_coding_block_size;
332 	width_in_ctb_luma =
333 		DIV_ROUND_UP(sps->pic_width_in_luma_samples, ctb_size_luma);
334 
335 	/* MV column buffer size and allocation. */
336 	if (!ctx->codec.h265.mv_col_buf_size) {
337 		unsigned int num_buffers =
338 			run->dst->vb2_buf.vb2_queue->num_buffers;
339 
340 		/*
341 		 * Each CTB requires a MV col buffer with a specific unit size.
342 		 * Since the address is given with missing lsb bits, 1 KiB is
343 		 * added to each buffer to ensure proper alignment.
344 		 */
345 		ctx->codec.h265.mv_col_buf_unit_size =
346 			DIV_ROUND_UP(ctx->src_fmt.width, ctb_size_luma) *
347 			DIV_ROUND_UP(ctx->src_fmt.height, ctb_size_luma) *
348 			CEDRUS_H265_MV_COL_BUF_UNIT_CTB_SIZE + SZ_1K;
349 
350 		ctx->codec.h265.mv_col_buf_size = num_buffers *
351 			ctx->codec.h265.mv_col_buf_unit_size;
352 
353 		/* Buffer is never accessed by CPU, so we can skip kernel mapping. */
354 		ctx->codec.h265.mv_col_buf =
355 			dma_alloc_attrs(dev->dev,
356 					ctx->codec.h265.mv_col_buf_size,
357 					&ctx->codec.h265.mv_col_buf_addr,
358 					GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
359 		if (!ctx->codec.h265.mv_col_buf) {
360 			ctx->codec.h265.mv_col_buf_size = 0;
361 			// TODO: Abort the process here.
362 			return;
363 		}
364 	}
365 
366 	/* Activate H265 engine. */
367 	cedrus_engine_enable(ctx, CEDRUS_CODEC_H265);
368 
369 	/* Source offset and length in bits. */
370 
371 	cedrus_write(dev, VE_DEC_H265_BITS_OFFSET, 0);
372 
373 	reg = slice_params->bit_size;
374 	cedrus_write(dev, VE_DEC_H265_BITS_LEN, reg);
375 
376 	/* Source beginning and end addresses. */
377 
378 	src_buf_addr = vb2_dma_contig_plane_dma_addr(&run->src->vb2_buf, 0);
379 
380 	reg = VE_DEC_H265_BITS_ADDR_BASE(src_buf_addr);
381 	reg |= VE_DEC_H265_BITS_ADDR_VALID_SLICE_DATA;
382 	reg |= VE_DEC_H265_BITS_ADDR_LAST_SLICE_DATA;
383 	reg |= VE_DEC_H265_BITS_ADDR_FIRST_SLICE_DATA;
384 
385 	cedrus_write(dev, VE_DEC_H265_BITS_ADDR, reg);
386 
387 	src_buf_end_addr = src_buf_addr +
388 			   DIV_ROUND_UP(slice_params->bit_size, 8);
389 
390 	reg = VE_DEC_H265_BITS_END_ADDR_BASE(src_buf_end_addr);
391 	cedrus_write(dev, VE_DEC_H265_BITS_END_ADDR, reg);
392 
393 	/* Coding tree block address */
394 	reg = VE_DEC_H265_DEC_CTB_ADDR_X(slice_params->slice_segment_addr % width_in_ctb_luma);
395 	reg |= VE_DEC_H265_DEC_CTB_ADDR_Y(slice_params->slice_segment_addr / width_in_ctb_luma);
396 	cedrus_write(dev, VE_DEC_H265_DEC_CTB_ADDR, reg);
397 
398 	cedrus_write(dev, VE_DEC_H265_TILE_START_CTB, 0);
399 	cedrus_write(dev, VE_DEC_H265_TILE_END_CTB, 0);
400 
401 	/* Clear the number of correctly-decoded coding tree blocks. */
402 	if (ctx->fh.m2m_ctx->new_frame)
403 		cedrus_write(dev, VE_DEC_H265_DEC_CTB_NUM, 0);
404 
405 	/* Initialize bitstream access. */
406 	cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_INIT_SWDEC);
407 
408 	cedrus_h265_skip_bits(dev, slice_params->data_bit_offset);
409 
410 	/* Bitstream parameters. */
411 
412 	reg = VE_DEC_H265_DEC_NAL_HDR_NAL_UNIT_TYPE(slice_params->nal_unit_type) |
413 	      VE_DEC_H265_DEC_NAL_HDR_NUH_TEMPORAL_ID_PLUS1(slice_params->nuh_temporal_id_plus1);
414 
415 	cedrus_write(dev, VE_DEC_H265_DEC_NAL_HDR, reg);
416 
417 	/* SPS. */
418 
419 	reg = VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTRA(sps->max_transform_hierarchy_depth_intra) |
420 	      VE_DEC_H265_DEC_SPS_HDR_MAX_TRANSFORM_HIERARCHY_DEPTH_INTER(sps->max_transform_hierarchy_depth_inter) |
421 	      VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_TRANSFORM_BLOCK_SIZE(sps->log2_diff_max_min_luma_transform_block_size) |
422 	      VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_TRANSFORM_BLOCK_SIZE_MINUS2(sps->log2_min_luma_transform_block_size_minus2) |
423 	      VE_DEC_H265_DEC_SPS_HDR_LOG2_DIFF_MAX_MIN_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_luma_coding_block_size) |
424 	      VE_DEC_H265_DEC_SPS_HDR_LOG2_MIN_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_luma_coding_block_size_minus3) |
425 	      VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_CHROMA_MINUS8(sps->bit_depth_chroma_minus8) |
426 	      VE_DEC_H265_DEC_SPS_HDR_BIT_DEPTH_LUMA_MINUS8(sps->bit_depth_luma_minus8) |
427 	      VE_DEC_H265_DEC_SPS_HDR_CHROMA_FORMAT_IDC(sps->chroma_format_idc);
428 
429 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_STRONG_INTRA_SMOOTHING_ENABLE,
430 				V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED,
431 				sps->flags);
432 
433 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SPS_TEMPORAL_MVP_ENABLED,
434 				V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED,
435 				sps->flags);
436 
437 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SAMPLE_ADAPTIVE_OFFSET_ENABLED,
438 				V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET,
439 				sps->flags);
440 
441 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_AMP_ENABLED,
442 				V4L2_HEVC_SPS_FLAG_AMP_ENABLED, sps->flags);
443 
444 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SPS_HDR_FLAG_SEPARATE_COLOUR_PLANE,
445 				V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE,
446 				sps->flags);
447 
448 	cedrus_write(dev, VE_DEC_H265_DEC_SPS_HDR, reg);
449 
450 	reg = VE_DEC_H265_DEC_PCM_CTRL_LOG2_DIFF_MAX_MIN_PCM_LUMA_CODING_BLOCK_SIZE(sps->log2_diff_max_min_pcm_luma_coding_block_size) |
451 	      VE_DEC_H265_DEC_PCM_CTRL_LOG2_MIN_PCM_LUMA_CODING_BLOCK_SIZE_MINUS3(sps->log2_min_pcm_luma_coding_block_size_minus3) |
452 	      VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_CHROMA_MINUS1(sps->pcm_sample_bit_depth_chroma_minus1) |
453 	      VE_DEC_H265_DEC_PCM_CTRL_PCM_SAMPLE_BIT_DEPTH_LUMA_MINUS1(sps->pcm_sample_bit_depth_luma_minus1);
454 
455 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_ENABLED,
456 				V4L2_HEVC_SPS_FLAG_PCM_ENABLED, sps->flags);
457 
458 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PCM_CTRL_FLAG_PCM_LOOP_FILTER_DISABLED,
459 				V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED,
460 				sps->flags);
461 
462 	cedrus_write(dev, VE_DEC_H265_DEC_PCM_CTRL, reg);
463 
464 	/* PPS. */
465 
466 	reg = VE_DEC_H265_DEC_PPS_CTRL0_PPS_CR_QP_OFFSET(pps->pps_cr_qp_offset) |
467 	      VE_DEC_H265_DEC_PPS_CTRL0_PPS_CB_QP_OFFSET(pps->pps_cb_qp_offset) |
468 	      VE_DEC_H265_DEC_PPS_CTRL0_INIT_QP_MINUS26(pps->init_qp_minus26) |
469 	      VE_DEC_H265_DEC_PPS_CTRL0_DIFF_CU_QP_DELTA_DEPTH(pps->diff_cu_qp_delta_depth);
470 
471 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CU_QP_DELTA_ENABLED,
472 				V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED,
473 				pps->flags);
474 
475 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_TRANSFORM_SKIP_ENABLED,
476 				V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED,
477 				pps->flags);
478 
479 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_CONSTRAINED_INTRA_PRED,
480 				V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED,
481 				pps->flags);
482 
483 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL0_FLAG_SIGN_DATA_HIDING_ENABLED,
484 				V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED,
485 				pps->flags);
486 
487 	cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL0, reg);
488 
489 	reg = VE_DEC_H265_DEC_PPS_CTRL1_LOG2_PARALLEL_MERGE_LEVEL_MINUS2(pps->log2_parallel_merge_level_minus2);
490 
491 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED,
492 				V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED,
493 				pps->flags);
494 
495 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED,
496 				V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED,
497 				pps->flags);
498 
499 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_ENTROPY_CODING_SYNC_ENABLED,
500 				V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED,
501 				pps->flags);
502 
503 	/* TODO: VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TILES_ENABLED */
504 
505 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_TRANSQUANT_BYPASS_ENABLED,
506 				V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED,
507 				pps->flags);
508 
509 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_BIPRED,
510 				V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED, pps->flags);
511 
512 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_PPS_CTRL1_FLAG_WEIGHTED_PRED,
513 				V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED, pps->flags);
514 
515 	cedrus_write(dev, VE_DEC_H265_DEC_PPS_CTRL1, reg);
516 
517 	/* Slice Parameters. */
518 
519 	reg = VE_DEC_H265_DEC_SLICE_HDR_INFO0_PICTURE_TYPE(slice_params->pic_struct) |
520 	      VE_DEC_H265_DEC_SLICE_HDR_INFO0_FIVE_MINUS_MAX_NUM_MERGE_CAND(slice_params->five_minus_max_num_merge_cand) |
521 	      VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L1_ACTIVE_MINUS1(slice_params->num_ref_idx_l1_active_minus1) |
522 	      VE_DEC_H265_DEC_SLICE_HDR_INFO0_NUM_REF_IDX_L0_ACTIVE_MINUS1(slice_params->num_ref_idx_l0_active_minus1) |
523 	      VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLLOCATED_REF_IDX(slice_params->collocated_ref_idx) |
524 	      VE_DEC_H265_DEC_SLICE_HDR_INFO0_COLOUR_PLANE_ID(slice_params->colour_plane_id) |
525 	      VE_DEC_H265_DEC_SLICE_HDR_INFO0_SLICE_TYPE(slice_params->slice_type);
526 
527 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_COLLOCATED_FROM_L0,
528 				V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0,
529 				slice_params->flags);
530 
531 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_CABAC_INIT,
532 				V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT,
533 				slice_params->flags);
534 
535 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_MVD_L1_ZERO,
536 				V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO,
537 				slice_params->flags);
538 
539 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_CHROMA,
540 				V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA,
541 				slice_params->flags);
542 
543 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_SAO_LUMA,
544 				V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA,
545 				slice_params->flags);
546 
547 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_SLICE_TEMPORAL_MVP_ENABLE,
548 				V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED,
549 				slice_params->flags);
550 
551 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_DEPENDENT_SLICE_SEGMENT,
552 				V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT,
553 				slice_params->flags);
554 
555 	if (ctx->fh.m2m_ctx->new_frame)
556 		reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO0_FLAG_FIRST_SLICE_SEGMENT_IN_PIC;
557 
558 	cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO0, reg);
559 
560 	reg = VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(slice_params->slice_tc_offset_div2) |
561 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(slice_params->slice_beta_offset_div2) |
562 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_POC_BIGEST_IN_RPS_ST(decode_params->num_poc_st_curr_after == 0) |
563 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(slice_params->slice_cr_qp_offset) |
564 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(slice_params->slice_cb_qp_offset) |
565 	      VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(slice_params->slice_qp_delta);
566 
567 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED,
568 				V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED,
569 				slice_params->flags);
570 
571 	reg |= VE_DEC_H265_FLAG(VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED,
572 				V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED,
573 				slice_params->flags);
574 
575 	cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO1, reg);
576 
577 	chroma_log2_weight_denom = pred_weight_table->luma_log2_weight_denom +
578 				   pred_weight_table->delta_chroma_log2_weight_denom;
579 	reg = VE_DEC_H265_DEC_SLICE_HDR_INFO2_NUM_ENTRY_POINT_OFFSETS(0) |
580 	      VE_DEC_H265_DEC_SLICE_HDR_INFO2_CHROMA_LOG2_WEIGHT_DENOM(chroma_log2_weight_denom) |
581 	      VE_DEC_H265_DEC_SLICE_HDR_INFO2_LUMA_LOG2_WEIGHT_DENOM(pred_weight_table->luma_log2_weight_denom);
582 
583 	cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO2, reg);
584 
585 	/* Decoded picture size. */
586 
587 	reg = VE_DEC_H265_DEC_PIC_SIZE_WIDTH(ctx->src_fmt.width) |
588 	      VE_DEC_H265_DEC_PIC_SIZE_HEIGHT(ctx->src_fmt.height);
589 
590 	cedrus_write(dev, VE_DEC_H265_DEC_PIC_SIZE, reg);
591 
592 	/* Scaling list. */
593 
594 	if (sps->flags & V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED) {
595 		cedrus_h265_write_scaling_list(ctx, run);
596 		reg = VE_DEC_H265_SCALING_LIST_CTRL0_FLAG_ENABLED;
597 	} else {
598 		reg = VE_DEC_H265_SCALING_LIST_CTRL0_DEFAULT;
599 	}
600 	cedrus_write(dev, VE_DEC_H265_SCALING_LIST_CTRL0, reg);
601 
602 	/* Neightbor information address. */
603 	reg = VE_DEC_H265_NEIGHBOR_INFO_ADDR_BASE(ctx->codec.h265.neighbor_info_buf_addr);
604 	cedrus_write(dev, VE_DEC_H265_NEIGHBOR_INFO_ADDR, reg);
605 
606 	/* Write decoded picture buffer in pic list. */
607 	cedrus_h265_frame_info_write_dpb(ctx, decode_params->dpb,
608 					 decode_params->num_active_dpb_entries);
609 
610 	/* Output frame. */
611 
612 	output_pic_list_index = V4L2_HEVC_DPB_ENTRIES_NUM_MAX;
613 	pic_order_cnt[0] = slice_params->slice_pic_order_cnt;
614 	pic_order_cnt[1] = slice_params->slice_pic_order_cnt;
615 
616 	cedrus_h265_frame_info_write_single(ctx, output_pic_list_index,
617 					    slice_params->pic_struct != 0,
618 					    pic_order_cnt,
619 					    run->dst->vb2_buf.index);
620 
621 	cedrus_write(dev, VE_DEC_H265_OUTPUT_FRAME_IDX, output_pic_list_index);
622 
623 	/* Reference picture list 0 (for P/B frames). */
624 	if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I) {
625 		cedrus_h265_ref_pic_list_write(dev, decode_params->dpb,
626 					       slice_params->ref_idx_l0,
627 					       slice_params->num_ref_idx_l0_active_minus1 + 1,
628 					       VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST0);
629 
630 		if ((pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED) ||
631 		    (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED))
632 			cedrus_h265_pred_weight_write(dev,
633 						      pred_weight_table->delta_luma_weight_l0,
634 						      pred_weight_table->luma_offset_l0,
635 						      pred_weight_table->delta_chroma_weight_l0,
636 						      pred_weight_table->chroma_offset_l0,
637 						      slice_params->num_ref_idx_l0_active_minus1 + 1,
638 						      VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L0,
639 						      VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L0);
640 	}
641 
642 	/* Reference picture list 1 (for B frames). */
643 	if (slice_params->slice_type == V4L2_HEVC_SLICE_TYPE_B) {
644 		cedrus_h265_ref_pic_list_write(dev, decode_params->dpb,
645 					       slice_params->ref_idx_l1,
646 					       slice_params->num_ref_idx_l1_active_minus1 + 1,
647 					       VE_DEC_H265_SRAM_OFFSET_REF_PIC_LIST1);
648 
649 		if (pps->flags & V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED)
650 			cedrus_h265_pred_weight_write(dev,
651 						      pred_weight_table->delta_luma_weight_l1,
652 						      pred_weight_table->luma_offset_l1,
653 						      pred_weight_table->delta_chroma_weight_l1,
654 						      pred_weight_table->chroma_offset_l1,
655 						      slice_params->num_ref_idx_l1_active_minus1 + 1,
656 						      VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_LUMA_L1,
657 						      VE_DEC_H265_SRAM_OFFSET_PRED_WEIGHT_CHROMA_L1);
658 	}
659 
660 	/* Enable appropriate interruptions. */
661 	cedrus_write(dev, VE_DEC_H265_CTRL, VE_DEC_H265_CTRL_IRQ_MASK);
662 }
663 
664 static int cedrus_h265_start(struct cedrus_ctx *ctx)
665 {
666 	struct cedrus_dev *dev = ctx->dev;
667 
668 	/* The buffer size is calculated at setup time. */
669 	ctx->codec.h265.mv_col_buf_size = 0;
670 
671 	/* Buffer is never accessed by CPU, so we can skip kernel mapping. */
672 	ctx->codec.h265.neighbor_info_buf =
673 		dma_alloc_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE,
674 				&ctx->codec.h265.neighbor_info_buf_addr,
675 				GFP_KERNEL, DMA_ATTR_NO_KERNEL_MAPPING);
676 	if (!ctx->codec.h265.neighbor_info_buf)
677 		return -ENOMEM;
678 
679 	return 0;
680 }
681 
682 static void cedrus_h265_stop(struct cedrus_ctx *ctx)
683 {
684 	struct cedrus_dev *dev = ctx->dev;
685 
686 	if (ctx->codec.h265.mv_col_buf_size > 0) {
687 		dma_free_attrs(dev->dev, ctx->codec.h265.mv_col_buf_size,
688 			       ctx->codec.h265.mv_col_buf,
689 			       ctx->codec.h265.mv_col_buf_addr,
690 			       DMA_ATTR_NO_KERNEL_MAPPING);
691 
692 		ctx->codec.h265.mv_col_buf_size = 0;
693 	}
694 
695 	dma_free_attrs(dev->dev, CEDRUS_H265_NEIGHBOR_INFO_BUF_SIZE,
696 		       ctx->codec.h265.neighbor_info_buf,
697 		       ctx->codec.h265.neighbor_info_buf_addr,
698 		       DMA_ATTR_NO_KERNEL_MAPPING);
699 }
700 
701 static void cedrus_h265_trigger(struct cedrus_ctx *ctx)
702 {
703 	struct cedrus_dev *dev = ctx->dev;
704 
705 	cedrus_write(dev, VE_DEC_H265_TRIGGER, VE_DEC_H265_TRIGGER_DEC_SLICE);
706 }
707 
708 struct cedrus_dec_ops cedrus_dec_ops_h265 = {
709 	.irq_clear	= cedrus_h265_irq_clear,
710 	.irq_disable	= cedrus_h265_irq_disable,
711 	.irq_status	= cedrus_h265_irq_status,
712 	.setup		= cedrus_h265_setup,
713 	.start		= cedrus_h265_start,
714 	.stop		= cedrus_h265_stop,
715 	.trigger	= cedrus_h265_trigger,
716 };
717