1 /*
2 * Copyright(c) 2019 Intel Corporation
3 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 *
5 * This source code is subject to the terms of the BSD 2 Clause License and
6 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7 * was not distributed with this source code in the LICENSE file, you can
8 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9 * Media Patent License 1.0 was not distributed with this source code in the
10 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11 */
12 #include <string.h>
13
14 #include "EbCodingLoop.h"
15 #include "EbUtility.h"
16 #include "EbTransformUnit.h"
17 #include "EbRateDistortionCost.h"
18 #include "EbDeblockingFilter.h"
19 #include "EbPictureOperators.h"
20 #include "EbSegmentation.h"
21 #include "EbEncDecProcess.h"
22 #include "EbSvtAv1ErrorCodes.h"
23 #include "EbTransforms.h"
24 #include "EbInvTransforms.h"
25 #include "EbModeDecisionConfigurationProcess.h"
26 #include "EbEncIntraPrediction.h"
27 #include "aom_dsp_rtcd.h"
28 #include "EbMdRateEstimation.h"
29 #include "EbFullLoop.h"
30 void av1_set_ref_frame(MvReferenceFrame *rf, int8_t ref_frame_type);
31 uint8_t av1_drl_ctx(const CandidateMv *ref_mv_stack, int32_t ref_idx);
32
33 /*******************************************
34 * set Penalize Skip Flag
35 *
36 * Summary: Set the penalize_skipflag to true
37 * When there is luminance/chrominance change
38 * or in noisy clip with low motion at meduim
39 * varince area
40 *
41 *******************************************/
42
43 #define S32 32 * 32
44 #define S16 16 * 16
45 #define S8 8 * 8
46 #define S4 4 * 4
47
48 typedef void (*EbAv1EncodeLoopFuncPtr)(PictureControlSet *pcs_ptr, EncDecContext *context_ptr,
49 SuperBlock *sb_ptr, uint32_t origin_x, uint32_t origin_y,
50 EbPictureBufferDesc *pred_samples, // no basis/offset
51 EbPictureBufferDesc *coeff_samples_sb, // sb based
52 EbPictureBufferDesc *residual16bit, // no basis/offset
53 EbPictureBufferDesc *transform16bit, // no basis/offset
54 EbPictureBufferDesc *inverse_quant_buffer,
55 uint32_t *count_non_zero_coeffs, uint32_t component_mask,
56 uint16_t *eob);
57
58
59 typedef void (*EbAv1GenerateReconFuncPtr)(EncDecContext *context_ptr, uint32_t origin_x,
60 uint32_t origin_y,
61 EbPictureBufferDesc *pred_samples, // no basis/offset
62 EbPictureBufferDesc *residual16bit, // no basis/offset
63 uint32_t component_mask, uint16_t *eob);
64
65 /*******************************************
66 * Residual Kernel 8-16bit
67 Computes the residual data
68 *******************************************/
residual_kernel(uint8_t * input,uint32_t input_offset,uint32_t input_stride,uint8_t * pred,uint32_t pred_offset,uint32_t pred_stride,int16_t * residual,uint32_t residual_offset,uint32_t residual_stride,EbBool hbd,uint32_t area_width,uint32_t area_height)69 void residual_kernel(uint8_t *input, uint32_t input_offset, uint32_t input_stride, uint8_t *pred,
70 uint32_t pred_offset, uint32_t pred_stride, int16_t *residual,
71 uint32_t residual_offset, uint32_t residual_stride, EbBool hbd,
72 uint32_t area_width, uint32_t area_height) {
73 if (hbd) {
74 svt_residual_kernel16bit(((uint16_t *)input) + input_offset,
75 input_stride,
76 ((uint16_t *)pred) + pred_offset,
77 pred_stride,
78 residual + residual_offset,
79 residual_stride,
80 area_width,
81 area_height);
82 } else {
83 svt_residual_kernel8bit(&(input[input_offset]),
84 input_stride,
85 &(pred[pred_offset]),
86 pred_stride,
87 residual + residual_offset,
88 residual_stride,
89 area_width,
90 area_height);
91 }
92 }
93
94 /***************************************************
95 * Update Intra Mode Neighbor Arrays
96 ***************************************************/
encode_pass_update_intra_mode_neighbor_arrays(NeighborArrayUnit * mode_type_neighbor_array,NeighborArrayUnit * intra_luma_mode_neighbor_array,NeighborArrayUnit * intra_chroma_mode_neighbor_array,uint8_t luma_mode,uint8_t chroma_mode,uint32_t origin_x,uint32_t origin_y,uint32_t width,uint32_t height,uint32_t width_uv,uint32_t height_uv,uint32_t component_mask)97 static void encode_pass_update_intra_mode_neighbor_arrays(
98 NeighborArrayUnit *mode_type_neighbor_array, NeighborArrayUnit *intra_luma_mode_neighbor_array,
99 NeighborArrayUnit *intra_chroma_mode_neighbor_array, uint8_t luma_mode, uint8_t chroma_mode,
100 uint32_t origin_x, uint32_t origin_y, uint32_t width, uint32_t height, uint32_t width_uv,
101 uint32_t height_uv, uint32_t component_mask) {
102 uint8_t mode_type = INTRA_MODE;
103
104 if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
105 // Mode Type Update
106 neighbor_array_unit_mode_write(mode_type_neighbor_array,
107 &mode_type,
108 origin_x,
109 origin_y,
110 width,
111 height,
112 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
113
114 // Intra Luma Mode Update
115 neighbor_array_unit_mode_write(intra_luma_mode_neighbor_array,
116 &luma_mode,
117 origin_x,
118 origin_y,
119 width,
120 height,
121 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
122 }
123 if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
124 // Intra Luma Mode Update
125 neighbor_array_unit_mode_write(intra_chroma_mode_neighbor_array,
126 &chroma_mode,
127 ((origin_x >> 3) << 3) / 2,
128 ((origin_y >> 3) << 3) / 2,
129 width_uv,
130 height_uv,
131 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
132 }
133
134 return;
135 }
136
137 /***************************************************
138 * Update Inter Mode Neighbor Arrays
139 ***************************************************/
encode_pass_update_inter_mode_neighbor_arrays(NeighborArrayUnit * mode_type_neighbor_array,NeighborArrayUnit * mv_neighbor_array,NeighborArrayUnit * skipNeighborArray,MvUnit * mv_unit,uint8_t * skip_flag,uint32_t origin_x,uint32_t origin_y,uint32_t bwidth,uint32_t bheight)140 static void encode_pass_update_inter_mode_neighbor_arrays(
141 NeighborArrayUnit *mode_type_neighbor_array, NeighborArrayUnit *mv_neighbor_array,
142 NeighborArrayUnit *skipNeighborArray, MvUnit *mv_unit, uint8_t *skip_flag, uint32_t origin_x,
143 uint32_t origin_y, uint32_t bwidth, uint32_t bheight) {
144 uint8_t mode_type = INTER_MODE;
145
146 // Mode Type Update
147 neighbor_array_unit_mode_write(mode_type_neighbor_array,
148 &mode_type,
149 origin_x,
150 origin_y,
151 bwidth,
152 bheight,
153 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
154
155 // Motion Vector Unit
156 neighbor_array_unit_mode_write(mv_neighbor_array,
157 (uint8_t *)mv_unit,
158 origin_x,
159 origin_y,
160 bwidth,
161 bheight,
162 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
163
164 // Skip Flag
165 neighbor_array_unit_mode_write(skipNeighborArray,
166 skip_flag,
167 origin_x,
168 origin_y,
169 bwidth,
170 bheight,
171 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
172
173 return;
174 }
175
176 /***************************************************
177 * Update Recon Samples Neighbor Arrays
178 ***************************************************/
encode_pass_update_recon_sample_neighbour_arrays(NeighborArrayUnit * lumaReconSampleNeighborArray,NeighborArrayUnit * cbReconSampleNeighborArray,NeighborArrayUnit * crReconSampleNeighborArray,EbPictureBufferDesc * recon_buffer,uint32_t origin_x,uint32_t origin_y,uint32_t width,uint32_t height,uint32_t bwidth_uv,uint32_t bheight_uv,uint32_t component_mask,EbBool is_16bit)179 static void encode_pass_update_recon_sample_neighbour_arrays(
180 NeighborArrayUnit *lumaReconSampleNeighborArray, NeighborArrayUnit *cbReconSampleNeighborArray,
181 NeighborArrayUnit *crReconSampleNeighborArray, EbPictureBufferDesc *recon_buffer,
182 uint32_t origin_x, uint32_t origin_y, uint32_t width, uint32_t height, uint32_t bwidth_uv,
183 uint32_t bheight_uv, uint32_t component_mask, EbBool is_16bit) {
184 uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
185 uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
186
187 if (is_16bit == EB_TRUE) {
188 if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
189 // Recon Samples - Luma
190 neighbor_array_unit16bit_sample_write(lumaReconSampleNeighborArray,
191 (uint16_t *)(recon_buffer->buffer_y),
192 recon_buffer->stride_y,
193 recon_buffer->origin_x + origin_x,
194 recon_buffer->origin_y + origin_y,
195 origin_x,
196 origin_y,
197 width,
198 height,
199 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
200 }
201
202 if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
203 // Recon Samples - Cb
204 neighbor_array_unit16bit_sample_write(cbReconSampleNeighborArray,
205 (uint16_t *)(recon_buffer->buffer_cb),
206 recon_buffer->stride_cb,
207 (recon_buffer->origin_x + round_origin_x) >> 1,
208 (recon_buffer->origin_y + round_origin_y) >> 1,
209 round_origin_x >> 1,
210 round_origin_y >> 1,
211 bwidth_uv,
212 bheight_uv,
213 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
214
215 // Recon Samples - Cr
216 neighbor_array_unit16bit_sample_write(crReconSampleNeighborArray,
217 (uint16_t *)(recon_buffer->buffer_cr),
218 recon_buffer->stride_cr,
219 (recon_buffer->origin_x + round_origin_x) >> 1,
220 (recon_buffer->origin_y + round_origin_y) >> 1,
221 round_origin_x >> 1,
222 round_origin_y >> 1,
223 bwidth_uv,
224 bheight_uv,
225 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
226 }
227 } else {
228 if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
229 // Recon Samples - Luma
230 neighbor_array_unit_sample_write(lumaReconSampleNeighborArray,
231 recon_buffer->buffer_y,
232 recon_buffer->stride_y,
233 recon_buffer->origin_x + origin_x,
234 recon_buffer->origin_y + origin_y,
235 origin_x,
236 origin_y,
237 width,
238 height,
239 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
240 }
241
242 if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
243 // Recon Samples - Cb
244 neighbor_array_unit_sample_write(cbReconSampleNeighborArray,
245 recon_buffer->buffer_cb,
246 recon_buffer->stride_cb,
247 (recon_buffer->origin_x + round_origin_x) >> 1,
248 (recon_buffer->origin_y + round_origin_y) >> 1,
249 round_origin_x >> 1,
250 round_origin_y >> 1,
251 bwidth_uv,
252 bheight_uv,
253 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
254
255 // Recon Samples - Cr
256 neighbor_array_unit_sample_write(crReconSampleNeighborArray,
257 recon_buffer->buffer_cr,
258 recon_buffer->stride_cr,
259 (recon_buffer->origin_x + round_origin_x) >> 1,
260 (recon_buffer->origin_y + round_origin_y) >> 1,
261 round_origin_x >> 1,
262 round_origin_y >> 1,
263 bwidth_uv,
264 bheight_uv,
265 NEIGHBOR_ARRAY_UNIT_FULL_MASK);
266 }
267 }
268
269 return;
270 }
271
272 /**********************************************************
273 * Encode Loop
274 *
275 * Summary: Performs an AV1 conformant
276 * Transform, Quantization and Inverse Quantization of a TU.
277 *
278 * Inputs:
279 * origin_x
280 * origin_y
281 * txb_size
282 * sb_sz
283 * input - input samples (position sensitive)
284 * pred - prediction samples (position independent)
285 *
286 * Outputs:
287 * Inverse quantized coeff - quantization indices (position sensitive)
288 *
289 **********************************************************/
av1_encode_loop(PictureControlSet * pcs_ptr,EncDecContext * context_ptr,SuperBlock * sb_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * coeff_samples_sb,EbPictureBufferDesc * residual16bit,EbPictureBufferDesc * transform16bit,EbPictureBufferDesc * inverse_quant_buffer,uint32_t * count_non_zero_coeffs,uint32_t component_mask,uint16_t * eob)290 static void av1_encode_loop(PictureControlSet *pcs_ptr, EncDecContext *context_ptr,
291 SuperBlock * sb_ptr,
292 uint32_t origin_x, //pic based tx org x
293 uint32_t origin_y, //pic based tx org y
294 EbPictureBufferDesc *pred_samples, // no basis/offset
295 EbPictureBufferDesc *coeff_samples_sb, // sb based
296 EbPictureBufferDesc *residual16bit, // no basis/offset
297 EbPictureBufferDesc *transform16bit, // no basis/offset
298 EbPictureBufferDesc *inverse_quant_buffer,
299 uint32_t *count_non_zero_coeffs,
300 uint32_t component_mask, uint16_t *eob) {
301
302 // uint32_t chroma_qp = cb_qp;
303 BlkStruct * blk_ptr = context_ptr->blk_ptr;
304 TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
305 // EB_SLICE slice_type = sb_ptr->pcs_ptr->slice_type;
306 // uint32_t temporal_layer_index = sb_ptr->pcs_ptr->temporal_layer_index;
307 uint32_t qindex = blk_ptr->qindex;
308 EbPictureBufferDesc *input_samples = context_ptr->input_samples;
309
310 uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
311 uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
312
313 const uint32_t input_luma_offset =
314 ((origin_y + input_samples->origin_y) * input_samples->stride_y) +
315 (origin_x + input_samples->origin_x);
316 const uint32_t input_cb_offset =
317 (((round_origin_y + input_samples->origin_y) >> 1) * input_samples->stride_cb) +
318 ((round_origin_x + input_samples->origin_x) >> 1);
319 const uint32_t input_cr_offset =
320 (((round_origin_y + input_samples->origin_y) >> 1) * input_samples->stride_cr) +
321 ((round_origin_x + input_samples->origin_x) >> 1);
322 const uint32_t pred_luma_offset =
323 ((pred_samples->origin_y + origin_y) * pred_samples->stride_y) +
324 (pred_samples->origin_x + origin_x);
325 const uint32_t pred_cb_offset =
326 (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cb) +
327 ((pred_samples->origin_x + round_origin_x) >> 1);
328 const uint32_t pred_cr_offset =
329 (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cr) +
330 ((pred_samples->origin_x + round_origin_x) >> 1);
331 int32_t is_inter = (blk_ptr->prediction_mode_flag == INTER_MODE || blk_ptr->use_intrabc)
332 ? EB_TRUE
333 : EB_FALSE;
334 const uint32_t scratch_luma_offset =
335 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] +
336 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] *
337 SB_STRIDE_Y;
338 const uint32_t scratch_cb_offset =
339 ROUND_UV(
340 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
341 2 +
342 ROUND_UV(
343 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
344 2 * SB_STRIDE_UV;
345 const uint32_t scratch_cr_offset =
346 ROUND_UV(
347 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
348 2 +
349 ROUND_UV(
350 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
351 2 * SB_STRIDE_UV;
352 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
353
354 const uint32_t coeff1d_offset_chroma = context_ptr->coded_area_sb_uv;
355 UNUSED(coeff1d_offset_chroma);
356
357 context_ptr->three_quad_energy = 0;
358 if (pcs_ptr->parent_pcs_ptr->blk_lambda_tuning) {
359 context_ptr->md_context->blk_geom = context_ptr->blk_geom;
360 context_ptr->md_context->blk_origin_x = context_ptr->blk_origin_x;
361 context_ptr->md_context->blk_origin_y = context_ptr->blk_origin_y;
362 //Get the new lambda for current block
363 set_tuned_blk_lambda(context_ptr->md_context, pcs_ptr);
364 }
365 //**********************************
366 // Luma
367 //**********************************
368 if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
369 component_mask == PICTURE_BUFFER_DESC_LUMA_MASK) {
370 if (context_ptr->md_skip_blk) {
371 count_non_zero_coeffs[0] = 0;
372 eob[0] = 0;
373 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = 0;
374 }
375 else {
376 svt_residual_kernel8bit(
377 input_samples->buffer_y + input_luma_offset,
378 input_samples->stride_y,
379 pred_samples->buffer_y + pred_luma_offset,
380 pred_samples->stride_y,
381 ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
382 residual16bit->stride_y,
383 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
384 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr]);
385
386 av1_estimate_transform(
387 ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
388 residual16bit->stride_y,
389 ((TranLow *)transform16bit->buffer_y) + coeff1d_offset,
390 NOT_USED_VALUE,
391 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
392 &context_ptr->three_quad_energy,
393 EB_8BIT,
394 txb_ptr->transform_type[PLANE_TYPE_Y],
395 PLANE_TYPE_Y,
396 context_ptr->md_context->pf_ctrls.pf_shape);
397
398 int32_t seg_qp = pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
399 ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
400 .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
401 : 0;
402
403 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
404 sb_ptr->pcs_ptr,
405 context_ptr->md_context,
406 ((TranLow *)transform16bit->buffer_y) + coeff1d_offset,
407 NOT_USED_VALUE,
408 ((int32_t *)coeff_samples_sb->buffer_y) + coeff1d_offset,
409 ((int32_t *)inverse_quant_buffer->buffer_y) + coeff1d_offset,
410 qindex,
411 seg_qp,
412 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
413 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
414 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
415 &eob[0],
416 &(count_non_zero_coeffs[0]),
417 COMPONENT_LUMA,
418 EB_8BIT,
419 txb_ptr->transform_type[PLANE_TYPE_Y],
420 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
421 context_ptr->md_context->luma_txb_skip_context,
422 context_ptr->md_context->luma_dc_sign_context,
423 blk_ptr->pred_mode,
424 blk_ptr->use_intrabc,
425 context_ptr->md_context->full_lambda_md[EB_8_BIT_MD],
426 EB_TRUE);
427
428 }
429 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[0] ? EB_TRUE : EB_FALSE;
430
431 if (count_non_zero_coeffs[0] == 0) {
432 // INTER. Chroma follows Luma in transform type
433 if (blk_ptr->prediction_mode_flag == INTER_MODE) {
434 txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
435 txb_ptr->transform_type[PLANE_TYPE_UV] = DCT_DCT;
436 } else { // INTRA
437 txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
438 }
439 }
440 txb_ptr->nz_coef_count[0] = (uint16_t)count_non_zero_coeffs[0];
441 }
442
443 if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
444 component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) {
445 if (blk_ptr->prediction_mode_flag == INTRA_MODE && blk_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
446 EbPictureBufferDesc *recon_samples = pred_samples;
447 uint32_t recon_luma_offset =
448 (recon_samples->origin_y + round_origin_y) * recon_samples->stride_y +
449 (recon_samples->origin_x + round_origin_x);
450
451 // Down sample Luma
452 svt_cfl_luma_subsampling_420_lbd(
453 recon_samples->buffer_y + recon_luma_offset,
454 recon_samples->stride_y,
455 context_ptr->md_context->pred_buf_q3,
456 context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth
457 ? (context_ptr->blk_geom->bwidth_uv << 1)
458 : context_ptr->blk_geom->bwidth,
459 context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight
460 ? (context_ptr->blk_geom->bheight_uv << 1)
461 : context_ptr->blk_geom->bheight);
462 int32_t round_offset =
463 ((context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) *
464 (context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr])) /
465 2;
466
467 svt_subtract_average(
468 context_ptr->md_context->pred_buf_q3,
469 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
470 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
471 round_offset,
472 svt_log2f(context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) +
473 svt_log2f(context_ptr->blk_geom
474 ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]));
475 if (blk_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
476 int32_t alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
477 blk_ptr->prediction_unit_array->cfl_alpha_signs,
478 CFL_PRED_U); // once for U, once for V
479
480 //TOCHANGE
481 //assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
482
483 svt_cfl_predict_lbd(
484 context_ptr->md_context->pred_buf_q3,
485 pred_samples->buffer_cb + pred_cb_offset,
486 pred_samples->stride_cb,
487 pred_samples->buffer_cb + pred_cb_offset,
488 pred_samples->stride_cb,
489 alpha_q3,
490 8,
491 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
492 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
493 alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
494 blk_ptr->prediction_unit_array->cfl_alpha_signs,
495 CFL_PRED_V); // once for U, once for V
496
497 //TOCHANGE
498 //assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
499
500 svt_cfl_predict_lbd(
501 context_ptr->md_context->pred_buf_q3,
502 pred_samples->buffer_cr + pred_cr_offset,
503 pred_samples->stride_cr,
504 pred_samples->buffer_cr + pred_cr_offset,
505 pred_samples->stride_cr,
506 alpha_q3,
507 8,
508 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
509 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
510 }
511 }
512
513 //**********************************
514 // Chroma
515 //**********************************
516 if (context_ptr->md_skip_blk) {
517 count_non_zero_coeffs[1] = 0;
518 eob[1] = 0;
519 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = 0;
520 count_non_zero_coeffs[2] = 0;
521 eob[2] = 0;
522 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = 0;
523 }
524 else {
525
526 int32_t seg_qp = pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
527 ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
528 .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
529 : 0;
530 //**********************************
531 // Cb
532 //**********************************
533 svt_residual_kernel8bit(
534 input_samples->buffer_cb + input_cb_offset,
535 input_samples->stride_cb,
536 pred_samples->buffer_cb + pred_cb_offset,
537 pred_samples->stride_cb,
538 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
539 residual16bit->stride_cb,
540 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
541 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
542
543 av1_estimate_transform(
544 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
545 residual16bit->stride_cb,
546 ((TranLow *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
547 NOT_USED_VALUE,
548 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
549 &context_ptr->three_quad_energy,
550 EB_8BIT,
551 txb_ptr->transform_type[PLANE_TYPE_UV],
552 PLANE_TYPE_UV,
553 context_ptr->md_context->pf_ctrls.pf_shape);
554 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = av1_quantize_inv_quantize(
555 sb_ptr->pcs_ptr,
556 context_ptr->md_context,
557 ((TranLow *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
558 NOT_USED_VALUE,
559 ((int32_t *)coeff_samples_sb->buffer_cb) + context_ptr->coded_area_sb_uv,
560 ((int32_t *)inverse_quant_buffer->buffer_cb) + context_ptr->coded_area_sb_uv,
561 qindex,
562 seg_qp,
563 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
564 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
565 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
566 &eob[1],
567 &(count_non_zero_coeffs[1]),
568 COMPONENT_CHROMA_CB,
569 EB_8BIT,
570 txb_ptr->transform_type[PLANE_TYPE_UV],
571 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
572 context_ptr->md_context->cb_txb_skip_context,
573 context_ptr->md_context->cb_dc_sign_context,
574 blk_ptr->pred_mode,
575 blk_ptr->use_intrabc,
576 context_ptr->md_context->full_lambda_md[EB_8_BIT_MD],
577 EB_TRUE);
578
579 //**********************************
580 // Cr
581 //**********************************
582 svt_residual_kernel8bit(
583 input_samples->buffer_cr + input_cr_offset,
584 input_samples->stride_cr,
585 pred_samples->buffer_cr + pred_cr_offset,
586 pred_samples->stride_cr,
587 ((int16_t *)residual16bit->buffer_cr) + scratch_cr_offset,
588 residual16bit->stride_cr,
589 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
590 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
591 av1_estimate_transform(
592 ((int16_t *)residual16bit->buffer_cr) + scratch_cb_offset,
593 residual16bit->stride_cr,
594 ((TranLow *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
595 NOT_USED_VALUE,
596 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
597 &context_ptr->three_quad_energy,
598 EB_8BIT,
599 txb_ptr->transform_type[PLANE_TYPE_UV],
600 PLANE_TYPE_UV,
601 context_ptr->md_context->pf_ctrls.pf_shape);
602 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = av1_quantize_inv_quantize(
603 sb_ptr->pcs_ptr,
604 context_ptr->md_context,
605 ((TranLow *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
606 NOT_USED_VALUE,
607 ((int32_t *)coeff_samples_sb->buffer_cr) + context_ptr->coded_area_sb_uv,
608 ((TranLow *)inverse_quant_buffer->buffer_cr) + context_ptr->coded_area_sb_uv,
609 qindex,
610 seg_qp,
611 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
612 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
613 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
614 &eob[2],
615 &(count_non_zero_coeffs[2]),
616 COMPONENT_CHROMA_CR,
617 EB_8BIT,
618 txb_ptr->transform_type[PLANE_TYPE_UV],
619 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
620 context_ptr->md_context->cr_txb_skip_context,
621 context_ptr->md_context->cr_dc_sign_context,
622 blk_ptr->pred_mode,
623 blk_ptr->use_intrabc,
624 context_ptr->md_context->full_lambda_md[EB_8_BIT_MD],
625 EB_TRUE);
626 }
627
628 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[1] ? EB_TRUE : EB_FALSE;
629 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[2] ? EB_TRUE : EB_FALSE;
630
631 txb_ptr->nz_coef_count[1] = (uint16_t)count_non_zero_coeffs[1];
632 txb_ptr->nz_coef_count[2] = (uint16_t)count_non_zero_coeffs[2];
633 }
634 return;
635 }
636 /**********************************************************
637 * Encode Loop
638 *
639 * Summary: Performs an AV1 conformant
640 * Transform, Quantization and Inverse Quantization of a TU.
641 *
642 * Inputs:
643 * origin_x
644 * origin_y
645 * txb_size
646 * sb_sz
647 * input - input samples (position sensitive)
648 * pred - prediction samples (position independent)
649 *
650 * Outputs:
651 * Inverse quantized coeff - quantization indices (position sensitive)
652 *
653 **********************************************************/
av1_encode_loop_16bit(PictureControlSet * pcs_ptr,EncDecContext * context_ptr,SuperBlock * sb_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * coeff_samples_sb,EbPictureBufferDesc * residual16bit,EbPictureBufferDesc * transform16bit,EbPictureBufferDesc * inverse_quant_buffer,uint32_t * count_non_zero_coeffs,uint32_t component_mask,uint16_t * eob)654 static void av1_encode_loop_16bit(PictureControlSet *pcs_ptr, EncDecContext *context_ptr,
655 SuperBlock *sb_ptr, uint32_t origin_x, uint32_t origin_y,
656 EbPictureBufferDesc *pred_samples, // no basis/offset
657 EbPictureBufferDesc *coeff_samples_sb, // sb based
658 EbPictureBufferDesc *residual16bit, // no basis/offset
659 EbPictureBufferDesc *transform16bit, // no basis/offset
660 EbPictureBufferDesc *inverse_quant_buffer,
661 uint32_t *count_non_zero_coeffs, uint32_t component_mask,
662 uint16_t *eob)
663
664 {
665
666 BlkStruct * blk_ptr = context_ptr->blk_ptr;
667 TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
668 // EB_SLICE slice_type = sb_ptr->pcs_ptr->slice_type;
669 // uint32_t temporal_layer_index = sb_ptr->pcs_ptr->temporal_layer_index;
670 uint32_t qindex = blk_ptr->qindex;
671 uint32_t bit_depth = context_ptr->bit_depth;
672 EbPictureBufferDesc *input_samples16bit = context_ptr->input_sample16bit_buffer;
673 EbPictureBufferDesc *pred_samples16bit = pred_samples;
674 uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
675 uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
676
677 int32_t is_inter = (blk_ptr->prediction_mode_flag == INTER_MODE || blk_ptr->use_intrabc)
678 ? EB_TRUE
679 : EB_FALSE;
680 const uint32_t input_luma_offset =
681 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] +
682 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] *
683 SB_STRIDE_Y;
684 const uint32_t input_cb_offset =
685 ROUND_UV(
686 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
687 2 +
688 ROUND_UV(
689 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
690 2 * SB_STRIDE_UV;
691 const uint32_t input_cr_offset =
692 ROUND_UV(
693 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
694 2 +
695 ROUND_UV(
696 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
697 2 * SB_STRIDE_UV;
698 const uint32_t pred_luma_offset =
699 ((pred_samples16bit->origin_y + origin_y) * pred_samples16bit->stride_y) +
700 (pred_samples16bit->origin_x + origin_x);
701 const uint32_t pred_cb_offset =
702 (((pred_samples16bit->origin_y + round_origin_y) >> 1) * pred_samples16bit->stride_cb) +
703 ((pred_samples16bit->origin_x + round_origin_x) >> 1);
704 const uint32_t pred_cr_offset =
705 (((pred_samples16bit->origin_y + round_origin_y) >> 1) * pred_samples16bit->stride_cr) +
706 ((pred_samples16bit->origin_x + round_origin_x) >> 1);
707
708 uint32_t scratch_luma_offset, scratch_cb_offset, scratch_cr_offset;
709
710 if (bit_depth != EB_8BIT) {
711 scratch_luma_offset =
712 context_ptr->blk_geom->origin_x + context_ptr->blk_geom->origin_y * SB_STRIDE_Y;
713 scratch_cb_offset = ROUND_UV(context_ptr->blk_geom->origin_x) / 2 +
714 ROUND_UV(context_ptr->blk_geom->origin_y) / 2 * SB_STRIDE_UV;
715 scratch_cr_offset = ROUND_UV(context_ptr->blk_geom->origin_x) / 2 +
716 ROUND_UV(context_ptr->blk_geom->origin_y) / 2 * SB_STRIDE_UV;
717 }
718 else {
719 scratch_luma_offset =
720 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] +
721 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] *
722 SB_STRIDE_Y;
723 scratch_cb_offset =
724 ROUND_UV(context_ptr->blk_geom
725 ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
726 2 +
727 ROUND_UV(context_ptr->blk_geom
728 ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
729 2 * SB_STRIDE_UV;
730 scratch_cr_offset =
731 ROUND_UV(context_ptr->blk_geom
732 ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
733 2 +
734 ROUND_UV(context_ptr->blk_geom
735 ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
736 2 * SB_STRIDE_UV;
737 context_ptr->three_quad_energy = 0;
738 }
739 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
740 const uint32_t coeff1d_offset_chroma = context_ptr->coded_area_sb_uv;
741 UNUSED(coeff1d_offset_chroma);
742
743 if (pcs_ptr->parent_pcs_ptr->blk_lambda_tuning) {
744 context_ptr->md_context->blk_geom = context_ptr->blk_geom;
745 context_ptr->md_context->blk_origin_x = context_ptr->blk_origin_x;
746 context_ptr->md_context->blk_origin_y = context_ptr->blk_origin_y;
747 //Get the new lambda for current block
748 set_tuned_blk_lambda(context_ptr->md_context, pcs_ptr);
749 }
750 {
751 //**********************************
752 // Luma
753 //**********************************
754 if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
755 component_mask == PICTURE_BUFFER_DESC_LUMA_MASK) {
756 if (context_ptr->md_skip_blk) {
757 count_non_zero_coeffs[0] = 0;
758 eob[0] = 0;
759 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = 0;
760 }
761 else {
762 svt_residual_kernel16bit(
763 ((uint16_t *)input_samples16bit->buffer_y) + input_luma_offset,
764 input_samples16bit->stride_y,
765 ((uint16_t *)pred_samples16bit->buffer_y) + pred_luma_offset,
766 pred_samples16bit->stride_y,
767 ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
768 residual16bit->stride_y,
769 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
770 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr]);
771 av1_estimate_transform(
772 ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
773 residual16bit->stride_y,
774 ((TranLow *)transform16bit->buffer_y) + coeff1d_offset,
775 NOT_USED_VALUE,
776 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
777 &context_ptr->three_quad_energy,
778 bit_depth,
779 txb_ptr->transform_type[PLANE_TYPE_Y],
780 PLANE_TYPE_Y,
781 context_ptr->md_context->pf_ctrls.pf_shape);
782
783 int32_t seg_qp =
784 pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
785 ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
786 .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
787 : 0;
788 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
789 sb_ptr->pcs_ptr,
790 context_ptr->md_context,
791 ((int32_t *)transform16bit->buffer_y) + coeff1d_offset,
792 NOT_USED_VALUE,
793 ((int32_t *)coeff_samples_sb->buffer_y) + coeff1d_offset,
794 ((int32_t *)inverse_quant_buffer->buffer_y) + coeff1d_offset,
795 qindex,
796 seg_qp,
797 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
798 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
799 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
800 &eob[0],
801 &(count_non_zero_coeffs[0]),
802 COMPONENT_LUMA,
803 bit_depth,
804 txb_ptr->transform_type[PLANE_TYPE_Y],
805 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
806 context_ptr->md_context->luma_txb_skip_context,
807 context_ptr->md_context->luma_dc_sign_context,
808 blk_ptr->pred_mode,
809 blk_ptr->use_intrabc,
810 context_ptr->md_context->full_lambda_md[(bit_depth == EB_10BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
811 EB_TRUE);
812 }
813 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[0] ? EB_TRUE : EB_FALSE;
814
815 if (count_non_zero_coeffs[0] == 0) {
816 // INTER. Chroma follows Luma in transform type
817 if (blk_ptr->prediction_mode_flag == INTER_MODE) {
818 txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
819 txb_ptr->transform_type[PLANE_TYPE_UV] = DCT_DCT;
820 } else { // INTRA
821 txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
822 }
823 }
824
825 txb_ptr->nz_coef_count[0] = (uint16_t)count_non_zero_coeffs[0];
826 }
827 if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
828 component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) {
829
830 if (blk_ptr->prediction_mode_flag == INTRA_MODE &&
831 blk_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
832 EbPictureBufferDesc *recon_samples = pred_samples16bit;
833
834 uint32_t recon_luma_offset =
835 (recon_samples->origin_y + round_origin_y) * recon_samples->stride_y +
836 (recon_samples->origin_x + round_origin_x);
837
838 // Down sample Luma
839 svt_cfl_luma_subsampling_420_hbd(
840 ((uint16_t *)recon_samples->buffer_y) + recon_luma_offset,
841 recon_samples->stride_y,
842 context_ptr->md_context->pred_buf_q3,
843 context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth
844 ? (context_ptr->blk_geom->bwidth_uv << 1)
845 : context_ptr->blk_geom->bwidth,
846 context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight
847 ? (context_ptr->blk_geom->bheight_uv << 1)
848 : context_ptr->blk_geom->bheight);
849 int32_t round_offset =
850 ((context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) *
851 (context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr])) /
852 2;
853
854 svt_subtract_average(
855 context_ptr->md_context->pred_buf_q3,
856 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
857 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
858 round_offset,
859 svt_log2f(context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) +
860 svt_log2f(context_ptr->blk_geom
861 ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]));
862
863 int32_t alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
864 blk_ptr->prediction_unit_array->cfl_alpha_signs,
865 CFL_PRED_U); // once for U, once for V
866 // TOCHANGE
867 // assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
868
869 svt_cfl_predict_hbd(
870 context_ptr->md_context->pred_buf_q3,
871 ((uint16_t *)pred_samples16bit->buffer_cb) + pred_cb_offset,
872 pred_samples16bit->stride_cb,
873 ((uint16_t *)pred_samples16bit->buffer_cb) + pred_cb_offset,
874 pred_samples16bit->stride_cb,
875 alpha_q3,
876 context_ptr->bit_depth,
877 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
878 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
879
880 alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
881 blk_ptr->prediction_unit_array->cfl_alpha_signs,
882 CFL_PRED_V); // once for U, once for V
883 // TOCHANGE
884 //assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
885
886 svt_cfl_predict_hbd(
887 context_ptr->md_context->pred_buf_q3,
888 ((uint16_t *)pred_samples16bit->buffer_cr) + pred_cr_offset,
889 pred_samples16bit->stride_cr,
890 ((uint16_t *)pred_samples16bit->buffer_cr) + pred_cr_offset,
891 pred_samples16bit->stride_cr,
892 alpha_q3,
893 context_ptr->bit_depth,
894 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
895 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
896 }
897
898 //**********************************
899 // Chroma
900 //**********************************
901 if (context_ptr->md_skip_blk) {
902 count_non_zero_coeffs[1] = 0;
903 eob[1] = 0;
904 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = 0;
905
906 count_non_zero_coeffs[2] = 0;
907 eob[2] = 0;
908 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = 0;
909 }
910 else {
911 int32_t seg_qp =
912 pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
913 ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
914 .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
915 : 0;
916 //**********************************
917 // Cb
918 //**********************************
919 svt_residual_kernel16bit(
920 ((uint16_t *)input_samples16bit->buffer_cb) + input_cb_offset,
921 input_samples16bit->stride_cb,
922 ((uint16_t *)pred_samples16bit->buffer_cb) + pred_cb_offset,
923 pred_samples16bit->stride_cb,
924 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
925 residual16bit->stride_cb,
926 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
927 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
928 av1_estimate_transform(
929 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
930 residual16bit->stride_cb,
931 ((TranLow *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
932 NOT_USED_VALUE,
933 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
934 &context_ptr->three_quad_energy,
935 bit_depth,
936 txb_ptr->transform_type[PLANE_TYPE_UV],
937 PLANE_TYPE_UV,
938 context_ptr->md_context->pf_ctrls.pf_shape);
939 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = av1_quantize_inv_quantize(
940 sb_ptr->pcs_ptr,
941 context_ptr->md_context,
942 ((int32_t *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
943 NOT_USED_VALUE,
944 ((int32_t *)coeff_samples_sb->buffer_cb) + context_ptr->coded_area_sb_uv,
945 ((int32_t *)inverse_quant_buffer->buffer_cb) + context_ptr->coded_area_sb_uv,
946 qindex,
947 seg_qp,
948 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
949 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
950 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
951 &eob[1],
952 &(count_non_zero_coeffs[1]),
953 COMPONENT_CHROMA_CB,
954 bit_depth,
955 txb_ptr->transform_type[PLANE_TYPE_UV],
956 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
957 context_ptr->md_context->cb_txb_skip_context,
958 context_ptr->md_context->cb_dc_sign_context,
959 blk_ptr->pred_mode,
960 blk_ptr->use_intrabc,
961 context_ptr->md_context->full_lambda_md[(bit_depth == EB_10BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
962 EB_TRUE);
963
964 //**********************************
965 // Cr
966 //**********************************
967 svt_residual_kernel16bit(
968 ((uint16_t *)input_samples16bit->buffer_cr) + input_cr_offset,
969 input_samples16bit->stride_cr,
970 ((uint16_t *)pred_samples16bit->buffer_cr) + pred_cr_offset,
971 pred_samples16bit->stride_cr,
972 ((int16_t *)residual16bit->buffer_cr) + scratch_cr_offset,
973 residual16bit->stride_cr,
974 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
975 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
976 av1_estimate_transform(
977 ((int16_t *)residual16bit->buffer_cr) + scratch_cb_offset,
978 residual16bit->stride_cr,
979 ((TranLow *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
980 NOT_USED_VALUE,
981 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
982 &context_ptr->three_quad_energy,
983 bit_depth,
984 txb_ptr->transform_type[PLANE_TYPE_UV],
985 PLANE_TYPE_UV,
986 context_ptr->md_context->pf_ctrls.pf_shape);
987 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = av1_quantize_inv_quantize(
988 sb_ptr->pcs_ptr,
989 context_ptr->md_context,
990 ((int32_t *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
991 NOT_USED_VALUE,
992 ((int32_t *)coeff_samples_sb->buffer_cr) + context_ptr->coded_area_sb_uv,
993 ((int32_t *)inverse_quant_buffer->buffer_cr) + context_ptr->coded_area_sb_uv,
994 qindex,
995 seg_qp,
996 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
997 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
998 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
999 &eob[2],
1000 &(count_non_zero_coeffs[2]),
1001 COMPONENT_CHROMA_CR,
1002 bit_depth,
1003 txb_ptr->transform_type[PLANE_TYPE_UV],
1004 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
1005 context_ptr->md_context->cr_txb_skip_context,
1006 context_ptr->md_context->cr_dc_sign_context,
1007 blk_ptr->pred_mode,
1008 blk_ptr->use_intrabc,
1009 context_ptr->md_context->full_lambda_md[(bit_depth == EB_10BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
1010 EB_TRUE);
1011 }
1012 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[1] ? EB_TRUE : EB_FALSE;
1013 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[2] ? EB_TRUE : EB_FALSE;
1014
1015 txb_ptr->nz_coef_count[1] = (uint16_t)count_non_zero_coeffs[1];
1016 txb_ptr->nz_coef_count[2] = (uint16_t)count_non_zero_coeffs[2];
1017 }
1018 }
1019
1020 return;
1021 }
1022
1023 /**********************************************************
1024 * Encode Generate Recon
1025 *
1026 * Summary: Performs an AV1 conformant
1027 * Inverse Transform and generate
1028 * the reconstructed samples of a TU.
1029 *
1030 * Inputs:
1031 * origin_x
1032 * origin_y
1033 * txb_size
1034 * sb_sz
1035 * input - Inverse Qunatized Coeff (position sensitive)
1036 * pred - prediction samples (position independent)
1037 *
1038 * Outputs:
1039 * Recon (position independent)
1040 *
1041 **********************************************************/
av1_encode_generate_recon(EncDecContext * context_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * residual16bit,uint32_t component_mask,uint16_t * eob)1042 static void av1_encode_generate_recon(EncDecContext *context_ptr, uint32_t origin_x,
1043 uint32_t origin_y,
1044 EbPictureBufferDesc *pred_samples, // no basis/offset
1045 EbPictureBufferDesc *residual16bit, // no basis/offset
1046 uint32_t component_mask, uint16_t *eob)
1047 {
1048 BlkStruct * blk_ptr = context_ptr->blk_ptr;
1049 TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
1050
1051 // *Note - The prediction is built in-place in the Recon buffer. It is overwritten with Reconstructed
1052 // samples if the CBF==1 && SKIP==False
1053
1054 //**********************************
1055 // Luma
1056 //**********************************
1057 if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1058 {
1059 uint32_t pred_luma_offset = (pred_samples->origin_y + origin_y) *
1060 pred_samples->stride_y +
1061 (pred_samples->origin_x + origin_x);
1062 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
1063 .y_has_coeff[context_ptr->txb_itr] == EB_TRUE &&
1064 blk_ptr->skip_flag == EB_FALSE) {
1065 uint8_t *pred_buffer = pred_samples->buffer_y + pred_luma_offset;
1066 av1_inv_transform_recon8bit(
1067 ((int32_t *)residual16bit->buffer_y) + context_ptr->coded_area_sb,
1068 pred_buffer,
1069 pred_samples->stride_y,
1070 pred_buffer,
1071 pred_samples->stride_y,
1072 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1073 txb_ptr->transform_type[PLANE_TYPE_Y],
1074 PLANE_TYPE_Y,
1075 eob[0],
1076 0 /*lossless*/
1077 );
1078 }
1079 }
1080 }
1081
1082 if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1083 //**********************************
1084 // Chroma
1085 //**********************************
1086
1087 uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
1088 uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
1089 uint32_t pred_chroma_offset = (((pred_samples->origin_y + round_origin_y) >> 1) *
1090 pred_samples->stride_cb) +
1091 ((pred_samples->origin_x + round_origin_x) >> 1);
1092
1093 //**********************************
1094 // Cb
1095 //**********************************
1096 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
1097 .u_has_coeff[context_ptr->txb_itr] == EB_TRUE &&
1098 blk_ptr->skip_flag == EB_FALSE) {
1099 uint8_t *pred_buffer = pred_samples->buffer_cb + pred_chroma_offset;
1100
1101 av1_inv_transform_recon8bit(
1102 ((int32_t *)residual16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
1103 pred_buffer,
1104 pred_samples->stride_cb,
1105 pred_buffer,
1106 pred_samples->stride_cb,
1107 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1108 txb_ptr->transform_type[PLANE_TYPE_UV],
1109 PLANE_TYPE_UV,
1110 eob[1],
1111 0 /*lossless*/);
1112 }
1113
1114 //**********************************
1115 // Cr
1116 //**********************************
1117 pred_chroma_offset =
1118 (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cr) +
1119 ((pred_samples->origin_x + round_origin_x) >> 1);
1120
1121 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
1122 .v_has_coeff[context_ptr->txb_itr] == EB_TRUE &&
1123 blk_ptr->skip_flag == EB_FALSE) {
1124 uint8_t *pred_buffer = pred_samples->buffer_cr + pred_chroma_offset;
1125
1126 av1_inv_transform_recon8bit(
1127 ((int32_t *)residual16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
1128 pred_buffer,
1129 pred_samples->stride_cr,
1130 pred_buffer,
1131 pred_samples->stride_cr,
1132 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1133 txb_ptr->transform_type[PLANE_TYPE_UV],
1134 PLANE_TYPE_UV,
1135 eob[2],
1136 0 /*lossless*/);
1137 }
1138 }
1139
1140 return;
1141 }
1142
1143 /**********************************************************
1144 * Encode Generate Recon
1145 *
1146 * Summary: Performs an AV1 conformant
1147 * Inverse Transform and generate
1148 * the reconstructed samples of a TU.
1149 *
1150 * Inputs:
1151 * origin_x
1152 * origin_y
1153 * txb_size
1154 * sb_sz
1155 * input - Inverse Qunatized Coeff (position sensitive)
1156 * pred - prediction samples (position independent)
1157 *
1158 * Outputs:
1159 * Recon (position independent)
1160 *
1161 **********************************************************/
av1_encode_generate_recon_16bit(EncDecContext * context_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * residual16bit,uint32_t component_mask,uint16_t * eob)1162 static void av1_encode_generate_recon_16bit(EncDecContext *context_ptr, uint32_t origin_x,
1163 uint32_t origin_y,
1164 EbPictureBufferDesc *pred_samples, // no basis/offset
1165 EbPictureBufferDesc *residual16bit, // no basis/offset
1166 uint32_t component_mask, uint16_t *eob) {
1167 BlkStruct * blk_ptr = context_ptr->blk_ptr;
1168 TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
1169
1170 //**********************************
1171 // Luma
1172 //**********************************
1173 if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1174 {
1175 uint32_t pred_luma_offset = (pred_samples->origin_y + origin_y) *
1176 pred_samples->stride_y +
1177 (pred_samples->origin_x + origin_x);
1178 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] == EB_TRUE && blk_ptr->skip_flag == EB_FALSE) {
1179
1180 uint16_t *pred_buffer = ((uint16_t *)pred_samples->buffer_y) + pred_luma_offset;
1181 av1_inv_transform_recon(
1182 ((int32_t *)residual16bit->buffer_y) + context_ptr->coded_area_sb,
1183 CONVERT_TO_BYTEPTR(pred_buffer),
1184 pred_samples->stride_y,
1185 CONVERT_TO_BYTEPTR(pred_buffer),
1186 pred_samples->stride_y,
1187 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1188 context_ptr->bit_depth,
1189 txb_ptr->transform_type[PLANE_TYPE_Y],
1190 PLANE_TYPE_Y,
1191 eob[0],
1192 0 /*lossless*/
1193 );
1194 }
1195 }
1196 }
1197
1198 if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1199 //**********************************
1200 // Chroma
1201 //**********************************
1202
1203 //**********************************
1204 // Cb
1205 //**********************************
1206
1207 uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
1208 uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
1209
1210 uint32_t pred_chroma_offset = (((pred_samples->origin_y + round_origin_y) >> 1) *
1211 pred_samples->stride_cb) +
1212 ((pred_samples->origin_x + round_origin_x) >> 1);
1213
1214 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] == EB_TRUE && blk_ptr->skip_flag == EB_FALSE) {
1215
1216 uint16_t *pred_buffer = ((uint16_t *)pred_samples->buffer_cb) + pred_chroma_offset;
1217 av1_inv_transform_recon(
1218 ((int32_t *)residual16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
1219 CONVERT_TO_BYTEPTR(pred_buffer),
1220 pred_samples->stride_cb,
1221 CONVERT_TO_BYTEPTR(pred_buffer),
1222 pred_samples->stride_cb,
1223 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1224 context_ptr->bit_depth,
1225 txb_ptr->transform_type[PLANE_TYPE_UV],
1226 PLANE_TYPE_UV,
1227 eob[1],
1228 0 /*lossless*/);
1229 }
1230
1231 //**********************************
1232 // Cr
1233 //**********************************
1234 pred_chroma_offset =
1235 (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cr) +
1236 ((pred_samples->origin_x + round_origin_x) >> 1);
1237 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] == EB_TRUE && blk_ptr->skip_flag == EB_FALSE) {
1238
1239 uint16_t *pred_buffer = ((uint16_t *)pred_samples->buffer_cr) + pred_chroma_offset;
1240 av1_inv_transform_recon(
1241 ((int32_t *)residual16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
1242 CONVERT_TO_BYTEPTR(pred_buffer),
1243 pred_samples->stride_cr,
1244 CONVERT_TO_BYTEPTR(pred_buffer),
1245 pred_samples->stride_cr,
1246 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1247 context_ptr->bit_depth,
1248 txb_ptr->transform_type[PLANE_TYPE_UV],
1249 PLANE_TYPE_UV,
1250 eob[2],
1251 0 /*lossless*/);
1252 }
1253 }
1254
1255 return;
1256 }
1257 static EbAv1EncodeLoopFuncPtr av1_encode_loop_func_table[2] = {av1_encode_loop,
1258 av1_encode_loop_16bit};
1259
1260 EbAv1GenerateReconFuncPtr av1_enc_gen_recon_func_ptr[2] = {av1_encode_generate_recon,
1261 av1_encode_generate_recon_16bit};
1262
store16bit_input_src(EbPictureBufferDesc * input_sample16bit_buffer,PictureControlSet * pcs_ptr,uint32_t sb_x,uint32_t sb_y,uint32_t sb_w,uint32_t sb_h)1263 void store16bit_input_src(EbPictureBufferDesc *input_sample16bit_buffer, PictureControlSet *pcs_ptr,
1264 uint32_t sb_x, uint32_t sb_y, uint32_t sb_w, uint32_t sb_h) {
1265 uint32_t row_it;
1266 uint16_t *from_ptr;
1267 uint16_t *to_ptr;
1268
1269 from_ptr = (uint16_t *)input_sample16bit_buffer->buffer_y;
1270 to_ptr = (uint16_t *)pcs_ptr->input_frame16bit->buffer_y +
1271 (sb_x + pcs_ptr->input_frame16bit->origin_x) +
1272 (sb_y + pcs_ptr->input_frame16bit->origin_y) * pcs_ptr->input_frame16bit->stride_y;
1273
1274 for (row_it = 0; row_it < sb_h; row_it++)
1275 svt_memcpy(to_ptr + row_it * pcs_ptr->input_frame16bit->stride_y,
1276 from_ptr + row_it * input_sample16bit_buffer->stride_y,
1277 sb_w * 2);
1278
1279 sb_x = sb_x / 2;
1280 sb_y = sb_y / 2;
1281 sb_w = sb_w / 2;
1282 sb_h = sb_h / 2;
1283
1284 from_ptr = (uint16_t *)input_sample16bit_buffer->buffer_cb;
1285 to_ptr =
1286 (uint16_t *)pcs_ptr->input_frame16bit->buffer_cb +
1287 (sb_x + pcs_ptr->input_frame16bit->origin_x / 2) +
1288 (sb_y + pcs_ptr->input_frame16bit->origin_y / 2) * pcs_ptr->input_frame16bit->stride_cb;
1289
1290 for (row_it = 0; row_it < sb_h; row_it++)
1291 svt_memcpy(to_ptr + row_it * pcs_ptr->input_frame16bit->stride_cb,
1292 from_ptr + row_it * input_sample16bit_buffer->stride_cb,
1293 sb_w * 2);
1294
1295 from_ptr = (uint16_t *)input_sample16bit_buffer->buffer_cr;
1296 to_ptr =
1297 (uint16_t *)pcs_ptr->input_frame16bit->buffer_cr +
1298 (sb_x + pcs_ptr->input_frame16bit->origin_x / 2) +
1299 (sb_y + pcs_ptr->input_frame16bit->origin_y / 2) * pcs_ptr->input_frame16bit->stride_cb;
1300
1301 for (row_it = 0; row_it < sb_h; row_it++)
1302 svt_memcpy(to_ptr + row_it * pcs_ptr->input_frame16bit->stride_cr,
1303 from_ptr + row_it * input_sample16bit_buffer->stride_cr,
1304 sb_w * 2);
1305 }
1306
1307 void update_mi_map_skip_settings(BlkStruct *blk_ptr);
1308 void move_blk_data(PictureControlSet *pcs, EncDecContext *context_ptr, BlkStruct *src_cu,
1309 BlkStruct *dst_cu);
1310
perform_intra_coding_loop(PictureControlSet * pcs_ptr,SuperBlock * sb_ptr,uint32_t sb_addr,BlkStruct * blk_ptr,PredictionUnit * pu_ptr,EncDecContext * context_ptr)1311 void perform_intra_coding_loop(PictureControlSet *pcs_ptr, SuperBlock *sb_ptr, uint32_t sb_addr,
1312 BlkStruct *blk_ptr, PredictionUnit *pu_ptr,
1313 EncDecContext *context_ptr) {
1314 EbBool is_16bit = context_ptr->is_16bit;
1315 uint32_t bit_depth = context_ptr->bit_depth;
1316 uint8_t is_inter = 0; // set to 0 b/c this is the intra path
1317 EbPictureBufferDesc *recon_buffer;
1318 EbPictureBufferDesc *coeff_buffer_sb = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->quantized_coeff[sb_addr];
1319 uint16_t tile_idx = context_ptr->tile_index;
1320 NeighborArrayUnit *ep_luma_recon_neighbor_array =
1321 is_16bit ? pcs_ptr->ep_luma_recon_neighbor_array16bit[tile_idx]
1322 : pcs_ptr->ep_luma_recon_neighbor_array[tile_idx];
1323 NeighborArrayUnit *ep_cb_recon_neighbor_array =
1324 is_16bit ? pcs_ptr->ep_cb_recon_neighbor_array16bit[tile_idx]
1325 : pcs_ptr->ep_cb_recon_neighbor_array[tile_idx];
1326 NeighborArrayUnit *ep_cr_recon_neighbor_array =
1327 is_16bit ? pcs_ptr->ep_cr_recon_neighbor_array16bit[tile_idx]
1328 : pcs_ptr->ep_cr_recon_neighbor_array[tile_idx];
1329
1330 EbPictureBufferDesc *residual_buffer = context_ptr->residual_buffer;
1331 EbPictureBufferDesc *transform_buffer = context_ptr->transform_buffer;
1332 EbPictureBufferDesc *inverse_quant_buffer = context_ptr->inverse_quant_buffer;
1333
1334 uint32_t count_non_zero_coeffs[3];
1335 uint16_t eobs[MAX_TXB_COUNT][3];
1336 uint64_t y_txb_coeff_bits;
1337 uint64_t cb_txb_coeff_bits;
1338 uint64_t cr_txb_coeff_bits;
1339
1340 if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
1341 //get the 16bit form of the input SB
1342 if (is_16bit)
1343 recon_buffer = ((EbReferenceObject *)
1344 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
1345 ->reference_picture16bit;
1346 else
1347 recon_buffer = ((EbReferenceObject *)
1348 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
1349 ->reference_picture;
1350 else // non ref pictures
1351 recon_buffer = is_16bit ? pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr : pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
1352 uint32_t tot_tu = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
1353
1354 // Luma path
1355 for (context_ptr->txb_itr = 0; context_ptr->txb_itr < tot_tu; context_ptr->txb_itr++) {
1356 uint16_t txb_origin_x =
1357 context_ptr->blk_origin_x +
1358 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1359 context_ptr->blk_geom->origin_x;
1360 uint16_t txb_origin_y =
1361 context_ptr->blk_origin_y +
1362 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1363 context_ptr->blk_geom->origin_y;
1364 context_ptr->md_context->luma_txb_skip_context = 0;
1365 context_ptr->md_context->luma_dc_sign_context = 0;
1366 get_txb_ctx(pcs_ptr,
1367 COMPONENT_LUMA,
1368 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
1369 txb_origin_x,
1370 txb_origin_y,
1371 context_ptr->blk_geom->bsize,
1372 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1373 &context_ptr->md_context->luma_txb_skip_context,
1374 &context_ptr->md_context->luma_dc_sign_context);
1375 if (is_16bit) {
1376 uint16_t top_neigh_array[64 * 2 + 1];
1377 uint16_t left_neigh_array[64 * 2 + 1];
1378 PredictionMode mode;
1379
1380 TxSize tx_size = context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1381
1382 if (txb_origin_y != 0)
1383 svt_memcpy(top_neigh_array + 1,
1384 (uint16_t *)(ep_luma_recon_neighbor_array->top_array) + txb_origin_x,
1385 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr] *
1386 2 * sizeof(uint16_t));
1387 if (txb_origin_x != 0)
1388 svt_memcpy(left_neigh_array + 1,
1389 (uint16_t *)(ep_luma_recon_neighbor_array->left_array) + txb_origin_y,
1390 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr] *
1391 2 * sizeof(uint16_t));
1392 if (txb_origin_y != 0 && txb_origin_x != 0)
1393 top_neigh_array[0] = left_neigh_array[0] =
1394 ((uint16_t *)(ep_luma_recon_neighbor_array->top_left_array) +
1395 MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y)[0];
1396
1397 mode = blk_ptr->pred_mode;
1398
1399 svt_av1_predict_intra_block_16bit(
1400 bit_depth,
1401 ED_STAGE,
1402 context_ptr->blk_geom,
1403 context_ptr->blk_ptr->av1xd,
1404 context_ptr->blk_geom->bwidth,
1405 context_ptr->blk_geom->bheight,
1406 tx_size,
1407 mode,
1408 pu_ptr->angle_delta[PLANE_TYPE_Y],
1409 blk_ptr->palette_info.pmi.palette_size[0] > 0,
1410 &blk_ptr->palette_info,
1411 blk_ptr->filter_intra_mode,
1412 top_neigh_array + 1,
1413 left_neigh_array + 1,
1414 recon_buffer,
1415 (context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1416 context_ptr->blk_geom->origin_x) >>
1417 2,
1418 (context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1419 context_ptr->blk_geom->origin_y) >>
1420 2,
1421 0,
1422 context_ptr->blk_geom->bsize,
1423 txb_origin_x,
1424 txb_origin_y,
1425 context_ptr->blk_origin_x,
1426 context_ptr->blk_origin_y,
1427 0,
1428 0,
1429 &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header);
1430 } else {
1431 uint8_t top_neigh_array[64 * 2 + 1];
1432 uint8_t left_neigh_array[64 * 2 + 1];
1433 PredictionMode mode;
1434
1435 TxSize tx_size = context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1436
1437 if (txb_origin_y != 0)
1438 svt_memcpy(
1439 top_neigh_array + 1,
1440 ep_luma_recon_neighbor_array->top_array + txb_origin_x,
1441 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr] * 2);
1442
1443 if (txb_origin_x != 0)
1444 svt_memcpy(
1445 left_neigh_array + 1,
1446 ep_luma_recon_neighbor_array->left_array + txb_origin_y,
1447 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr] * 2);
1448
1449 if (txb_origin_y != 0 && txb_origin_x != 0)
1450 top_neigh_array[0] = left_neigh_array[0] =
1451 ep_luma_recon_neighbor_array
1452 ->top_left_array[MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y];
1453
1454 mode = blk_ptr->pred_mode;
1455
1456 // Hsan: if CHROMA_MODE_2, then CFL will be evaluated @ EP as no CHROMA @ MD
1457 // If that's the case then you should ensure than the 1st chroma prediction uses UV_DC_PRED (that's the default configuration for CHROMA_MODE_2 if CFL applicable (set @ fast loop candidates injection) then MD assumes chroma mode always UV_DC_PRED)
1458 svt_av1_predict_intra_block(
1459 ED_STAGE,
1460 context_ptr->blk_geom,
1461 blk_ptr->av1xd,
1462 context_ptr->blk_geom->bwidth,
1463 context_ptr->blk_geom->bheight,
1464 tx_size,
1465 mode,
1466 pu_ptr->angle_delta[PLANE_TYPE_Y],
1467 blk_ptr->palette_info.pmi.palette_size[0] > 0,
1468 &blk_ptr->palette_info,
1469 blk_ptr->filter_intra_mode,
1470 top_neigh_array + 1,
1471 left_neigh_array + 1,
1472 recon_buffer,
1473 (context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1474 context_ptr->blk_geom->origin_x) >>
1475 2,
1476 (context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1477 context_ptr->blk_geom->origin_y) >>
1478 2,
1479 0,
1480 context_ptr->blk_geom->bsize,
1481 txb_origin_x,
1482 txb_origin_y,
1483 context_ptr->blk_origin_x,
1484 context_ptr->blk_origin_y,
1485 0,
1486 0,
1487 &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header );
1488 }
1489 // Encode Transform Unit -INTRA-
1490 av1_encode_loop_func_table[is_16bit](pcs_ptr,
1491 context_ptr,
1492 sb_ptr,
1493 txb_origin_x,
1494 txb_origin_y,
1495 recon_buffer,
1496 coeff_buffer_sb,
1497 residual_buffer,
1498 transform_buffer,
1499 inverse_quant_buffer,
1500 count_non_zero_coeffs,
1501 PICTURE_BUFFER_DESC_LUMA_MASK,
1502 eobs[context_ptr->txb_itr]);
1503
1504 if (pcs_ptr->cdf_ctrl.update_coef) {
1505 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
1506 context_ptr->md_context->candidate_buffer_ptr_array;
1507 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
1508 &(candidate_buffer_ptr_array_base[0]);
1509 ModeDecisionCandidateBuffer *candidate_buffer;
1510
1511 // Set the Candidate Buffer
1512 candidate_buffer = candidate_buffer_ptr_array[0];
1513 // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
1514 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] =
1515 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y];
1516 candidate_buffer->candidate_ptr->transform_type_uv =
1517 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV];
1518 candidate_buffer->candidate_ptr->type = blk_ptr->prediction_mode_flag;
1519 candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
1520 candidate_buffer->candidate_ptr->filter_intra_mode = blk_ptr->filter_intra_mode;
1521 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
1522
1523 av1_txb_estimate_coeff_bits(
1524 context_ptr->md_context,
1525 1, //allow_update_cdf,
1526 &pcs_ptr->ec_ctx_array[sb_addr],
1527 pcs_ptr,
1528 candidate_buffer,
1529 coeff1d_offset,
1530 context_ptr->coded_area_sb_uv,
1531 coeff_buffer_sb,
1532 eobs[context_ptr->txb_itr][0],
1533 eobs[context_ptr->txb_itr][1],
1534 eobs[context_ptr->txb_itr][2],
1535 &y_txb_coeff_bits,
1536 &cb_txb_coeff_bits,
1537 &cr_txb_coeff_bits,
1538 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1539 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1540 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
1541 candidate_buffer->candidate_ptr->transform_type_uv,
1542 COMPONENT_LUMA);
1543 }
1544
1545 av1_enc_gen_recon_func_ptr[is_16bit](context_ptr,
1546 txb_origin_x,
1547 txb_origin_y,
1548 recon_buffer,
1549 inverse_quant_buffer,
1550 PICTURE_BUFFER_DESC_LUMA_MASK,
1551 eobs[context_ptr->txb_itr]);
1552
1553 // Update Recon Samples-INTRA-
1554 encode_pass_update_recon_sample_neighbour_arrays(
1555 ep_luma_recon_neighbor_array,
1556 ep_cb_recon_neighbor_array,
1557 ep_cr_recon_neighbor_array,
1558 recon_buffer,
1559 txb_origin_x,
1560 txb_origin_y,
1561 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
1562 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
1563 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1564 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1565 PICTURE_BUFFER_DESC_LUMA_MASK,
1566 is_16bit);
1567
1568 context_ptr->coded_area_sb +=
1569 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr] *
1570 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr];
1571
1572 // Update the luma Dc Sign Level Coeff Neighbor Array
1573 {
1574 uint8_t dc_sign_level_coeff = (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
1575 neighbor_array_unit_mode_write(
1576 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
1577 (uint8_t *)&dc_sign_level_coeff,
1578 txb_origin_x,
1579 txb_origin_y,
1580 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
1581 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
1582 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1583 }
1584
1585 } // Transform Loop
1586
1587 // Chroma path
1588
1589 if (context_ptr->blk_geom->has_uv) {
1590 context_ptr->txb_itr = 0;
1591 uint16_t txb_origin_x =
1592 context_ptr->blk_origin_x +
1593 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1594 context_ptr->blk_geom->origin_x;
1595 uint16_t txb_origin_y =
1596 context_ptr->blk_origin_y +
1597 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1598 context_ptr->blk_geom->origin_y;
1599 uint32_t blk_originx_uv = (context_ptr->blk_origin_x >> 3 << 3) >> 1;
1600 uint32_t blk_originy_uv = (context_ptr->blk_origin_y >> 3 << 3) >> 1;
1601
1602 context_ptr->md_context->cb_txb_skip_context = 0;
1603 context_ptr->md_context->cb_dc_sign_context = 0;
1604 get_txb_ctx(pcs_ptr,
1605 COMPONENT_CHROMA,
1606 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
1607 blk_originx_uv,
1608 blk_originy_uv,
1609 context_ptr->blk_geom->bsize_uv,
1610 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1611 &context_ptr->md_context->cb_txb_skip_context,
1612 &context_ptr->md_context->cb_dc_sign_context);
1613
1614 context_ptr->md_context->cr_txb_skip_context = 0;
1615 context_ptr->md_context->cr_dc_sign_context = 0;
1616 get_txb_ctx(pcs_ptr,
1617 COMPONENT_CHROMA,
1618 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
1619 blk_originx_uv,
1620 blk_originy_uv,
1621 context_ptr->blk_geom->bsize_uv,
1622 context_ptr->blk_geom->txsize_uv[context_ptr->blk_ptr->tx_depth][context_ptr->txb_itr],
1623 &context_ptr->md_context->cr_txb_skip_context,
1624 &context_ptr->md_context->cr_dc_sign_context);
1625
1626 if (is_16bit) {
1627 uint16_t top_neigh_array[64 * 2 + 1];
1628 uint16_t left_neigh_array[64 * 2 + 1];
1629 PredictionMode mode;
1630
1631 int32_t plane_end = 2;
1632
1633 for (int32_t plane = 1; plane <= plane_end; ++plane) {
1634 TxSize tx_size =
1635 plane
1636 ? context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr]
1637 : context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1638
1639 if (plane == 1) {
1640 if (blk_originy_uv != 0)
1641 svt_memcpy(top_neigh_array + 1,
1642 (uint16_t *)(ep_cb_recon_neighbor_array->top_array) + blk_originx_uv,
1643 context_ptr->blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
1644 if (blk_originx_uv != 0)
1645 svt_memcpy(left_neigh_array + 1,
1646 (uint16_t *)(ep_cb_recon_neighbor_array->left_array) + blk_originy_uv,
1647 context_ptr->blk_geom->bheight_uv * 2 * sizeof(uint16_t));
1648 if (blk_originy_uv != 0 && blk_originx_uv != 0)
1649 top_neigh_array[0] = left_neigh_array[0] =
1650 ((uint16_t *)(ep_cb_recon_neighbor_array->top_left_array) +
1651 MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv - blk_originy_uv)[0];
1652 } else if (plane == 2) {
1653 if (blk_originy_uv != 0)
1654 svt_memcpy(top_neigh_array + 1,
1655 (uint16_t *)(ep_cr_recon_neighbor_array->top_array) + blk_originx_uv,
1656 context_ptr->blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
1657 if (blk_originx_uv != 0)
1658 svt_memcpy(left_neigh_array + 1,
1659 (uint16_t *)(ep_cr_recon_neighbor_array->left_array) + blk_originy_uv,
1660 context_ptr->blk_geom->bheight_uv * 2 * sizeof(uint16_t));
1661 if (blk_originy_uv != 0 && blk_originx_uv != 0)
1662 top_neigh_array[0] = left_neigh_array[0] =
1663 ((uint16_t *)(ep_cr_recon_neighbor_array->top_left_array) +
1664 MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv - blk_originy_uv)[0];
1665 }
1666
1667 mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED)
1668 ? (PredictionMode)UV_DC_PRED
1669 : (PredictionMode)pu_ptr->intra_chroma_mode;
1670
1671 svt_av1_predict_intra_block_16bit(
1672 bit_depth,
1673 ED_STAGE,
1674 context_ptr->blk_geom,
1675 context_ptr->blk_ptr->av1xd,
1676 plane ? context_ptr->blk_geom->bwidth_uv : context_ptr->blk_geom->bwidth,
1677 plane ? context_ptr->blk_geom->bheight_uv : context_ptr->blk_geom->bheight,
1678 tx_size,
1679 mode,
1680 plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
1681 0, //chroma
1682 &blk_ptr->palette_info,
1683 FILTER_INTRA_MODES,
1684 top_neigh_array + 1,
1685 left_neigh_array + 1,
1686 recon_buffer,
1687 plane ? 0
1688 : (context_ptr->blk_geom
1689 ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1690 context_ptr->blk_geom->origin_x) >>
1691 2,
1692 plane ? 0
1693 : (context_ptr->blk_geom
1694 ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1695 context_ptr->blk_geom->origin_y) >>
1696 2,
1697 plane,
1698 context_ptr->blk_geom->bsize,
1699 txb_origin_x,
1700 txb_origin_y,
1701 context_ptr->blk_origin_x,
1702 context_ptr->blk_origin_y,
1703 0,
1704 0,
1705 &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header);
1706 }
1707 } else {
1708 uint8_t top_neigh_array[64 * 2 + 1];
1709 uint8_t left_neigh_array[64 * 2 + 1];
1710 PredictionMode mode;
1711
1712 // Partition Loop
1713 int32_t plane_end = 2;
1714
1715 for (int32_t plane = 1; plane <= plane_end; ++plane) {
1716 TxSize tx_size =
1717 plane
1718 ? context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr]
1719 : context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1720
1721 if (plane == 1) {
1722 if (blk_originy_uv != 0)
1723 svt_memcpy(top_neigh_array + 1,
1724 ep_cb_recon_neighbor_array->top_array + blk_originx_uv,
1725 context_ptr->blk_geom->bwidth_uv * 2);
1726
1727 if (blk_originx_uv != 0)
1728 svt_memcpy(left_neigh_array + 1,
1729 ep_cb_recon_neighbor_array->left_array + blk_originy_uv,
1730 context_ptr->blk_geom->bheight_uv * 2);
1731
1732 if (blk_originy_uv != 0 && blk_originx_uv != 0)
1733 top_neigh_array[0] = left_neigh_array[0] =
1734 ep_cb_recon_neighbor_array
1735 ->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv -
1736 blk_originy_uv];
1737 } else {
1738 if (blk_originy_uv != 0)
1739 svt_memcpy(top_neigh_array + 1,
1740 ep_cr_recon_neighbor_array->top_array + blk_originx_uv,
1741 context_ptr->blk_geom->bwidth_uv * 2);
1742
1743 if (blk_originx_uv != 0)
1744 svt_memcpy(left_neigh_array + 1,
1745 ep_cr_recon_neighbor_array->left_array + blk_originy_uv,
1746 context_ptr->blk_geom->bheight_uv * 2);
1747
1748 if (blk_originy_uv != 0 && blk_originx_uv != 0)
1749 top_neigh_array[0] = left_neigh_array[0] =
1750 ep_cr_recon_neighbor_array
1751 ->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv -
1752 blk_originy_uv];
1753 }
1754
1755 mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED)
1756 ? (PredictionMode)UV_DC_PRED
1757 : (PredictionMode)pu_ptr->intra_chroma_mode;
1758
1759 // Hsan: if CHROMA_MODE_2, then CFL will be evaluated @ EP as no CHROMA @ MD
1760 // If that's the case then you should ensure than the 1st chroma prediction uses UV_DC_PRED (that's the default configuration for CHROMA_MODE_2 if CFL applicable (set @ fast loop candidates injection) then MD assumes chroma mode always UV_DC_PRED)
1761 svt_av1_predict_intra_block(
1762 ED_STAGE,
1763 context_ptr->blk_geom,
1764 blk_ptr->av1xd,
1765 plane ? context_ptr->blk_geom->bwidth_uv : context_ptr->blk_geom->bwidth,
1766 plane ? context_ptr->blk_geom->bheight_uv : context_ptr->blk_geom->bheight,
1767 tx_size,
1768 mode,
1769 plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
1770 0, //chroma
1771 &blk_ptr->palette_info,
1772 FILTER_INTRA_MODES,
1773 top_neigh_array + 1,
1774 left_neigh_array + 1,
1775 recon_buffer,
1776 plane ? 0
1777 : (context_ptr->blk_geom
1778 ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1779 context_ptr->blk_geom->origin_x) >>
1780 2,
1781 plane ? 0
1782 : (context_ptr->blk_geom
1783 ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1784 context_ptr->blk_geom->origin_y) >>
1785 2,
1786 plane,
1787 context_ptr->blk_geom->bsize,
1788 txb_origin_x,
1789 txb_origin_y,
1790 context_ptr->blk_origin_x,
1791 context_ptr->blk_origin_y,
1792 0,
1793 0,
1794 &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header);
1795 }
1796 }
1797
1798 // Encode Transform Unit -INTRA-
1799
1800 av1_encode_loop_func_table[is_16bit](pcs_ptr,
1801 context_ptr,
1802 sb_ptr,
1803 txb_origin_x,
1804 txb_origin_y,
1805 recon_buffer,
1806 coeff_buffer_sb,
1807 residual_buffer,
1808 transform_buffer,
1809 inverse_quant_buffer,
1810 count_non_zero_coeffs,
1811 PICTURE_BUFFER_DESC_CHROMA_MASK,
1812 eobs[context_ptr->txb_itr]);
1813
1814 if (pcs_ptr->cdf_ctrl.update_coef) {
1815 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
1816 context_ptr->md_context->candidate_buffer_ptr_array;
1817 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
1818 &(candidate_buffer_ptr_array_base[0]);
1819 ModeDecisionCandidateBuffer *candidate_buffer;
1820
1821 // Set the Candidate Buffer
1822 candidate_buffer = candidate_buffer_ptr_array[0];
1823 // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
1824 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] =
1825 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y];
1826 candidate_buffer->candidate_ptr->transform_type_uv =
1827 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV];
1828 candidate_buffer->candidate_ptr->type = blk_ptr->prediction_mode_flag;
1829 candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
1830 candidate_buffer->candidate_ptr->filter_intra_mode = blk_ptr->filter_intra_mode;
1831 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
1832
1833 av1_txb_estimate_coeff_bits(
1834 context_ptr->md_context,
1835 1, //allow_update_cdf,
1836 &pcs_ptr->ec_ctx_array[sb_addr],
1837 pcs_ptr,
1838 candidate_buffer,
1839 coeff1d_offset,
1840 context_ptr->coded_area_sb_uv,
1841 coeff_buffer_sb,
1842 eobs[context_ptr->txb_itr][0],
1843 eobs[context_ptr->txb_itr][1],
1844 eobs[context_ptr->txb_itr][2],
1845 &y_txb_coeff_bits,
1846 &cb_txb_coeff_bits,
1847 &cr_txb_coeff_bits,
1848 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1849 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1850 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
1851 candidate_buffer->candidate_ptr->transform_type_uv,
1852 COMPONENT_CHROMA);
1853 }
1854
1855 av1_enc_gen_recon_func_ptr[is_16bit](context_ptr,
1856 txb_origin_x,
1857 txb_origin_y,
1858 recon_buffer,
1859 inverse_quant_buffer,
1860 PICTURE_BUFFER_DESC_CHROMA_MASK,
1861 eobs[context_ptr->txb_itr]);
1862
1863 // Update Recon Samples-INTRA-
1864 encode_pass_update_recon_sample_neighbour_arrays(
1865 ep_luma_recon_neighbor_array,
1866 ep_cb_recon_neighbor_array,
1867 ep_cr_recon_neighbor_array,
1868 recon_buffer,
1869 txb_origin_x,
1870 txb_origin_y,
1871 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
1872 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
1873 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1874 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1875 PICTURE_BUFFER_DESC_CHROMA_MASK,
1876 is_16bit);
1877
1878 context_ptr->coded_area_sb_uv +=
1879 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr] *
1880 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr];
1881
1882 // Update the cb Dc Sign Level Coeff Neighbor Array
1883 {
1884 uint8_t dc_sign_level_coeff = (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
1885 neighbor_array_unit_mode_write(
1886 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
1887 (uint8_t *)&dc_sign_level_coeff,
1888 ROUND_UV(txb_origin_x) >> 1,
1889 ROUND_UV(txb_origin_y) >> 1,
1890 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1891 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1892 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1893 }
1894
1895 // Update the cr DC Sign Level Coeff Neighbor Array
1896 {
1897 uint8_t dc_sign_level_coeff = (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
1898 neighbor_array_unit_mode_write(
1899 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
1900 (uint8_t *)&dc_sign_level_coeff,
1901 ROUND_UV(txb_origin_x) >> 1,
1902 ROUND_UV(txb_origin_y) >> 1,
1903 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1904 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1905 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1906 }
1907
1908 } // Transform Loop
1909 for (context_ptr->txb_itr = 0; context_ptr->txb_itr < tot_tu; context_ptr->txb_itr++) {
1910 uint8_t uv_pass = blk_ptr->tx_depth && context_ptr->txb_itr ? 0 : 1;
1911
1912 if (context_ptr->blk_geom->has_uv && uv_pass) {
1913 blk_ptr->block_has_coeff = blk_ptr->block_has_coeff |
1914 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] |
1915 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] |
1916 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr];
1917
1918 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr])
1919 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[0] = EB_TRUE;
1920 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr])
1921 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[0] = EB_TRUE;
1922 }
1923 else {
1924 blk_ptr->block_has_coeff =
1925 blk_ptr->block_has_coeff | context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr];
1926 }
1927 } // Transform Loop
1928 }
1929 #define REFMVS_LIMIT ((1 << 12) - 1)
1930
av1_copy_frame_mvs(PictureControlSet * pcs_ptr,const Av1Common * const cm,MbModeInfo mi,int mi_row,int mi_col,int x_mis,int y_mis,EbReferenceObject * object_ptr)1931 static void av1_copy_frame_mvs(PictureControlSet *pcs_ptr, const Av1Common *const cm, MbModeInfo mi,
1932 int mi_row, int mi_col, int x_mis, int y_mis,
1933 EbReferenceObject *object_ptr) {
1934 const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
1935 MV_REF * frame_mvs = object_ptr->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
1936 x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
1937 y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
1938 int w, h;
1939
1940 for (h = 0; h < y_mis; h++) {
1941 MV_REF *mv = frame_mvs;
1942 for (w = 0; w < x_mis; w++) {
1943 mv->ref_frame = NONE_FRAME;
1944 mv->mv.as_int = 0;
1945
1946 for (int idx = 0; idx < 2; ++idx) {
1947 MvReferenceFrame ref_frame = mi.block_mi.ref_frame[idx];
1948 if (ref_frame > INTRA_FRAME) {
1949 int8_t ref_idx = pcs_ptr->ref_frame_side[ref_frame];
1950 if (ref_idx) continue;
1951 if ((abs(mi.block_mi.mv[idx].as_mv.row) > REFMVS_LIMIT) ||
1952 (abs(mi.block_mi.mv[idx].as_mv.col) > REFMVS_LIMIT))
1953 continue;
1954 mv->ref_frame = ref_frame;
1955 mv->mv.as_int = mi.block_mi.mv[idx].as_int;
1956 }
1957 }
1958 mv++;
1959 }
1960 frame_mvs += frame_mvs_stride;
1961 }
1962 }
1963 /*******************************************
1964 * Encode Pass
1965 *
1966 * Summary: Performs an AV1 conformant
1967 * reconstruction based on the SB
1968 * mode decision.
1969 *
1970 * Inputs:
1971 * SourcePic
1972 * Coding Results
1973 * SB Location
1974 * Sequence Control Set
1975 * Picture Control Set
1976 *
1977 * Outputs:
1978 * Reconstructed Samples
1979 * Coefficient Samples
1980 *
1981 *******************************************/
av1_encode_decode(SequenceControlSet * scs_ptr,PictureControlSet * pcs_ptr,SuperBlock * sb_ptr,uint32_t sb_addr,uint32_t sb_origin_x,uint32_t sb_origin_y,EncDecContext * context_ptr)1982 EB_EXTERN void av1_encode_decode(SequenceControlSet *scs_ptr, PictureControlSet *pcs_ptr,
1983 SuperBlock *sb_ptr, uint32_t sb_addr, uint32_t sb_origin_x,
1984 uint32_t sb_origin_y, EncDecContext *context_ptr) {
1985 EbBool is_16bit = context_ptr->is_16bit;
1986 EbPictureBufferDesc *recon_buffer;
1987 EbPictureBufferDesc *coeff_buffer_sb = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->quantized_coeff[sb_addr];
1988 EbPictureBufferDesc *input_picture;
1989 ModeDecisionContext *md_context_ptr;
1990 md_context_ptr = context_ptr->md_context;
1991 input_picture = context_ptr->input_samples =
1992 (EbPictureBufferDesc *)pcs_ptr->parent_pcs_ptr->enhanced_picture_ptr;
1993 // SB Stats
1994 uint32_t sb_width =
1995 MIN(scs_ptr->sb_size_pix, pcs_ptr->parent_pcs_ptr->aligned_width - sb_origin_x);
1996 uint32_t sb_height =
1997 MIN(scs_ptr->sb_size_pix, pcs_ptr->parent_pcs_ptr->aligned_height - sb_origin_y);
1998 // MV merge mode
1999 uint32_t y_has_coeff;
2000 uint32_t u_has_coeff;
2001 uint32_t v_has_coeff;
2002 uint32_t count_non_zero_coeffs[3];
2003 uint16_t eobs[MAX_TXB_COUNT][3];
2004 uint64_t y_txb_coeff_bits;
2005 uint64_t cb_txb_coeff_bits;
2006 uint64_t cr_txb_coeff_bits;
2007 EncodeContext * encode_context_ptr;
2008 // Dereferencing early
2009 uint16_t tile_idx = context_ptr->tile_index;
2010 uint16_t total_tile_cnt = pcs_ptr->parent_pcs_ptr->av1_cm->tiles_info.tile_cols *
2011 pcs_ptr->parent_pcs_ptr->av1_cm->tiles_info.tile_rows;
2012 NeighborArrayUnit *ep_mode_type_neighbor_array = pcs_ptr->ep_mode_type_neighbor_array[tile_idx];
2013 NeighborArrayUnit *ep_intra_luma_mode_neighbor_array =
2014 pcs_ptr->ep_intra_luma_mode_neighbor_array[tile_idx];
2015 NeighborArrayUnit *ep_intra_chroma_mode_neighbor_array =
2016 pcs_ptr->ep_intra_chroma_mode_neighbor_array[tile_idx];
2017 NeighborArrayUnit *ep_mv_neighbor_array = pcs_ptr->ep_mv_neighbor_array[tile_idx];
2018 NeighborArrayUnit *ep_luma_recon_neighbor_array =
2019 is_16bit ? pcs_ptr->ep_luma_recon_neighbor_array16bit[tile_idx]
2020 : pcs_ptr->ep_luma_recon_neighbor_array[tile_idx];
2021 NeighborArrayUnit *ep_cb_recon_neighbor_array =
2022 is_16bit ? pcs_ptr->ep_cb_recon_neighbor_array16bit[tile_idx]
2023 : pcs_ptr->ep_cb_recon_neighbor_array[tile_idx];
2024 NeighborArrayUnit *ep_cr_recon_neighbor_array =
2025 is_16bit ? pcs_ptr->ep_cr_recon_neighbor_array16bit[tile_idx]
2026 : pcs_ptr->ep_cr_recon_neighbor_array[tile_idx];
2027 NeighborArrayUnit *ep_skip_flag_neighbor_array = pcs_ptr->ep_skip_flag_neighbor_array[tile_idx];
2028
2029 EbBool dlf_enable_flag = (EbBool)pcs_ptr->parent_pcs_ptr->loop_filter_mode;
2030 encode_context_ptr =
2031 ((SequenceControlSet *)(pcs_ptr->scs_wrapper_ptr->object_ptr))->encode_context_ptr;
2032
2033 if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
2034 //get the 16bit form of the input SB
2035 if (is_16bit)
2036 recon_buffer = ((EbReferenceObject *)
2037 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
2038 ->reference_picture16bit;
2039 else
2040 recon_buffer = ((EbReferenceObject *)
2041 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
2042 ->reference_picture;
2043 else // non ref pictures
2044 recon_buffer = is_16bit ? pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr : pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
2045 if (is_16bit && scs_ptr->static_config.encoder_bit_depth > EB_8BIT) {
2046 //SB128_TODO change 10bit SB creation
2047
2048 if ((scs_ptr->static_config.ten_bit_format == 1) ||
2049 (scs_ptr->static_config.compressed_ten_bit_format == 1)) {
2050 const uint32_t input_luma_offset =
2051 ((sb_origin_y + input_picture->origin_y) * input_picture->stride_y) +
2052 (sb_origin_x + input_picture->origin_x);
2053 const uint32_t input_cb_offset =
2054 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cb) +
2055 ((sb_origin_x + input_picture->origin_x) >> 1);
2056 const uint32_t input_cr_offset =
2057 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cr) +
2058 ((sb_origin_x + input_picture->origin_x) >> 1);
2059 const uint16_t luma_2bit_width = input_picture->width / 4;
2060 const uint16_t chroma_2bit_width = input_picture->width / 8;
2061
2062 compressed_pack_sb(input_picture->buffer_y + input_luma_offset,
2063 input_picture->stride_y,
2064 input_picture->buffer_bit_inc_y + sb_origin_y * luma_2bit_width +
2065 (sb_origin_x / 4) * sb_height,
2066 sb_width / 4,
2067 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
2068 context_ptr->input_sample16bit_buffer->stride_y,
2069 sb_width,
2070 sb_height);
2071
2072 compressed_pack_sb(input_picture->buffer_cb + input_cb_offset,
2073 input_picture->stride_cb,
2074 input_picture->buffer_bit_inc_cb +
2075 sb_origin_y / 2 * chroma_2bit_width +
2076 (sb_origin_x / 8) * (sb_height / 2),
2077 sb_width / 8,
2078 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
2079 context_ptr->input_sample16bit_buffer->stride_cb,
2080 sb_width >> 1,
2081 sb_height >> 1);
2082
2083 compressed_pack_sb(input_picture->buffer_cr + input_cr_offset,
2084 input_picture->stride_cr,
2085 input_picture->buffer_bit_inc_cr +
2086 sb_origin_y / 2 * chroma_2bit_width +
2087 (sb_origin_x / 8) * (sb_height / 2),
2088 sb_width / 8,
2089 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
2090 context_ptr->input_sample16bit_buffer->stride_cr,
2091 sb_width >> 1,
2092 sb_height >> 1);
2093 } else {
2094 const uint32_t input_luma_offset =
2095 ((sb_origin_y + input_picture->origin_y) * input_picture->stride_y) +
2096 (sb_origin_x + input_picture->origin_x);
2097 const uint32_t input_bit_inc_luma_offset =
2098 ((sb_origin_y + input_picture->origin_y) * input_picture->stride_bit_inc_y) +
2099 (sb_origin_x + input_picture->origin_x);
2100 const uint32_t input_cb_offset =
2101 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cb) +
2102 ((sb_origin_x + input_picture->origin_x) >> 1);
2103 const uint32_t input_bit_inc_cb_offset =
2104 (((sb_origin_y + input_picture->origin_y) >> 1) *
2105 input_picture->stride_bit_inc_cb) +
2106 ((sb_origin_x + input_picture->origin_x) >> 1);
2107 const uint32_t input_cr_offset =
2108 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cr) +
2109 ((sb_origin_x + input_picture->origin_x) >> 1);
2110 const uint32_t input_bit_inc_cr_offset = (((sb_origin_y + input_picture->origin_y) >> 1) *
2111 input_picture->stride_bit_inc_cr) +
2112 ((sb_origin_x + input_picture->origin_x) >> 1);
2113
2114 pack2d_src(input_picture->buffer_y + input_luma_offset,
2115 input_picture->stride_y,
2116 input_picture->buffer_bit_inc_y + input_bit_inc_luma_offset,
2117 input_picture->stride_bit_inc_y,
2118 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
2119 context_ptr->input_sample16bit_buffer->stride_y,
2120 sb_width,
2121 sb_height);
2122
2123 pack2d_src(input_picture->buffer_cb + input_cb_offset,
2124 input_picture->stride_cr,
2125 input_picture->buffer_bit_inc_cb + input_bit_inc_cb_offset,
2126 input_picture->stride_bit_inc_cr,
2127 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
2128 context_ptr->input_sample16bit_buffer->stride_cb,
2129 sb_width >> 1,
2130 sb_height >> 1);
2131
2132 pack2d_src(input_picture->buffer_cr + input_cr_offset,
2133 input_picture->stride_cr,
2134 input_picture->buffer_bit_inc_cr + input_bit_inc_cr_offset,
2135 input_picture->stride_bit_inc_cr,
2136 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
2137 context_ptr->input_sample16bit_buffer->stride_cr,
2138 sb_width >> 1,
2139 sb_height >> 1);
2140 // PAD the packed source in incomplete sb up to max SB size
2141 pad_input_picture_16bit(
2142 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
2143 context_ptr->input_sample16bit_buffer->stride_y,
2144 sb_width,
2145 sb_height,
2146 scs_ptr->sb_size_pix - sb_width,
2147 scs_ptr->sb_size_pix - sb_height);
2148 pad_input_picture_16bit(
2149 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
2150 context_ptr->input_sample16bit_buffer->stride_cb,
2151 sb_width >> 1,
2152 sb_height >> 1,
2153 (scs_ptr->sb_size_pix- sb_width )>>1,
2154 (scs_ptr->sb_size_pix - sb_height)>>1);
2155
2156 pad_input_picture_16bit(
2157 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
2158 context_ptr->input_sample16bit_buffer->stride_cr,
2159 sb_width >> 1,
2160 sb_height >> 1,
2161 (scs_ptr->sb_size_pix - sb_width )>>1,
2162 (scs_ptr->sb_size_pix - sb_height)>>1);
2163 }
2164
2165 if (context_ptr->md_context->hbd_mode_decision == 0)
2166 store16bit_input_src(context_ptr->input_sample16bit_buffer,
2167 pcs_ptr,
2168 sb_origin_x,
2169 sb_origin_y,
2170 scs_ptr->sb_size_pix,
2171 scs_ptr->sb_size_pix);
2172 }
2173
2174 if (is_16bit && scs_ptr->static_config.encoder_bit_depth == EB_8BIT) {
2175 const uint32_t input_luma_offset =
2176 ((sb_origin_y + input_picture->origin_y) * input_picture->stride_y) +
2177 (sb_origin_x + input_picture->origin_x);
2178 const uint32_t input_cb_offset =
2179 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cb) +
2180 ((sb_origin_x + input_picture->origin_x) >> 1);
2181 const uint32_t input_cr_offset =
2182 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cr) +
2183 ((sb_origin_x + input_picture->origin_x) >> 1);
2184
2185 sb_width =
2186 ((sb_width < MIN_SB_SIZE) || ((sb_width > MIN_SB_SIZE) && (sb_width < MAX_SB_SIZE)))
2187 ? MIN(scs_ptr->sb_size_pix,
2188 (pcs_ptr->parent_pcs_ptr->aligned_width + scs_ptr->right_padding) -
2189 sb_origin_x)
2190 : sb_width;
2191 sb_height =
2192 ((sb_height < MIN_SB_SIZE) || ((sb_height > MIN_SB_SIZE) && (sb_height < MAX_SB_SIZE)))
2193 ? MIN(scs_ptr->sb_size_pix,
2194 (pcs_ptr->parent_pcs_ptr->aligned_height + scs_ptr->bot_padding) -
2195 sb_origin_y)
2196 : sb_height;
2197
2198 // PACK Y
2199 uint16_t *buf_16bit = (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y;
2200 uint8_t * buf_8bit = input_picture->buffer_y + input_luma_offset;
2201 svt_convert_8bit_to_16bit(buf_8bit,
2202 input_picture->stride_y,
2203 buf_16bit,
2204 context_ptr->input_sample16bit_buffer->stride_y,
2205 sb_width,
2206 sb_height);
2207
2208 // PACK CB
2209 buf_16bit = (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb;
2210 buf_8bit = input_picture->buffer_cb + input_cb_offset;
2211 svt_convert_8bit_to_16bit(buf_8bit,
2212 input_picture->stride_cb,
2213 buf_16bit,
2214 context_ptr->input_sample16bit_buffer->stride_cb,
2215 sb_width >> 1,
2216 sb_height >> 1);
2217
2218 // PACK CR
2219 buf_16bit = (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr;
2220 buf_8bit = input_picture->buffer_cr + input_cr_offset;
2221 svt_convert_8bit_to_16bit(buf_8bit,
2222 input_picture->stride_cr,
2223 buf_16bit,
2224 context_ptr->input_sample16bit_buffer->stride_cr,
2225 sb_width >> 1,
2226 sb_height >> 1);
2227 }
2228 context_ptr->coded_area_sb = 0;
2229 context_ptr->coded_area_sb_uv = 0;
2230
2231 if (dlf_enable_flag && pcs_ptr->parent_pcs_ptr->loop_filter_mode == 1 && total_tile_cnt == 1) {
2232 if (sb_addr == 0) {
2233 svt_av1_loop_filter_init(pcs_ptr);
2234
2235 svt_av1_pick_filter_level(
2236 (EbPictureBufferDesc *)pcs_ptr->parent_pcs_ptr->enhanced_picture_ptr,
2237 pcs_ptr,
2238 LPF_PICK_FROM_Q);
2239
2240 svt_av1_loop_filter_frame_init(
2241 &pcs_ptr->parent_pcs_ptr->frm_hdr, &pcs_ptr->parent_pcs_ptr->lf_info, 0, 3);
2242 }
2243 }
2244
2245 uint32_t final_blk_itr = 0;
2246 // CU Loop
2247 uint32_t blk_it = 0;
2248 while (blk_it < scs_ptr->max_block_cnt) {
2249 BlkStruct *blk_ptr = context_ptr->blk_ptr =
2250 &context_ptr->md_context->md_blk_arr_nsq[blk_it];
2251 //At the boundary when it's not a complete super block.
2252 //We may only use part of the blocks in MD.
2253 //And the mds_idx of the parent block is not set properly
2254 //And it will generate the wrong cdf ctx and influence the MD for the next SB
2255 blk_ptr->mds_idx = blk_it;
2256 PartitionType part = blk_ptr->part;
2257
2258 const BlockGeom *blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_it);
2259 sb_ptr->cu_partition_array[blk_it] = context_ptr->md_context->md_blk_arr_nsq[blk_it].part;
2260 if (pcs_ptr->cdf_ctrl.update_se) {
2261 blk_ptr->av1xd->tile_ctx = &pcs_ptr->ec_ctx_array[sb_addr];
2262 // Update the partition stats
2263 update_part_stats(pcs_ptr,
2264 blk_ptr,
2265 tile_idx,
2266 (sb_origin_y + blk_geom->origin_y) >> MI_SIZE_LOG2,
2267 (sb_origin_x + blk_geom->origin_x) >> MI_SIZE_LOG2);
2268 }
2269 if ((use_input_stat(scs_ptr) || scs_ptr->lap_enabled ) &&
2270 blk_it == 0 && sb_origin_x == 0 && blk_geom->origin_x == 0 && sb_origin_y == 0 && blk_geom->origin_y == 0) {
2271 pcs_ptr->parent_pcs_ptr->pcs_total_rate = 0;
2272 }
2273 if (part != PARTITION_SPLIT && pcs_ptr->parent_pcs_ptr->sb_geom[sb_addr].block_is_allowed[blk_it]) {
2274 int32_t offset_d1 = ns_blk_offset[(int32_t)part]; //blk_ptr->best_d1_blk; // TOCKECK
2275 int32_t num_d1_block =
2276 ns_blk_num[(int32_t)part]; // context_ptr->blk_geom->totns; // TOCKECK
2277
2278 // for (int32_t d1_itr = blk_it; d1_itr < blk_it + num_d1_block; d1_itr++) {
2279 for (int32_t d1_itr = (int32_t)blk_it + offset_d1;
2280 d1_itr < (int32_t)blk_it + offset_d1 + num_d1_block;
2281 d1_itr++) {
2282 blk_geom = context_ptr->blk_geom = get_blk_geom_mds(d1_itr);
2283
2284 // PU Stack variables
2285 PredictionUnit * pu_ptr = (PredictionUnit *)NULL; // done
2286 EbPictureBufferDesc *residual_buffer = context_ptr->residual_buffer;
2287 EbPictureBufferDesc *transform_buffer = context_ptr->transform_buffer;
2288
2289 EbPictureBufferDesc *inverse_quant_buffer = context_ptr->inverse_quant_buffer;
2290
2291 blk_ptr = context_ptr->blk_ptr =
2292 &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
2293
2294 context_ptr->blk_origin_x = (uint16_t)(sb_origin_x + blk_geom->origin_x);
2295 context_ptr->blk_origin_y = (uint16_t)(sb_origin_y + blk_geom->origin_y);
2296 if (context_ptr->md_context->ep_use_md_skip_decision)
2297 context_ptr->md_skip_blk = !blk_ptr->block_has_coeff;
2298 else
2299 context_ptr->md_skip_blk =
2300 context_ptr->md_context->blk_skip_decision
2301 ? ((blk_ptr->prediction_mode_flag == INTRA_MODE || blk_ptr->block_has_coeff)
2302 ? 0
2303 : 1)
2304 : 0;
2305 blk_ptr->block_has_coeff = 0;
2306
2307 // if(pcs_ptr->picture_number==4 && context_ptr->blk_origin_x==0 && context_ptr->blk_origin_y==0)
2308 // SVT_LOG("CHEDD");
2309 uint32_t coded_area_org = context_ptr->coded_area_sb;
2310 uint32_t coded_area_org_uv = context_ptr->coded_area_sb_uv;
2311 // for now, segmentation independent of sharpness/delta QP.
2312 if (pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled) {
2313 apply_segmentation_based_quantization(blk_geom, pcs_ptr, sb_ptr, blk_ptr);
2314 sb_ptr->qindex = blk_ptr->qindex;
2315 } else {
2316 blk_ptr->qindex = sb_ptr->qindex;
2317 }
2318 svt_block_on_mutex(pcs_ptr->parent_pcs_ptr->pcs_total_rate_mutex);
2319 pcs_ptr->parent_pcs_ptr->pcs_total_rate += blk_ptr->total_rate;
2320 svt_release_mutex(pcs_ptr->parent_pcs_ptr->pcs_total_rate_mutex);
2321 if (blk_ptr->prediction_mode_flag == INTRA_MODE) {
2322 context_ptr->is_inter = blk_ptr->use_intrabc;
2323 if (scs_ptr->static_config.encoder_bit_depth > EB_8BIT &&
2324 pcs_ptr->hbd_mode_decision == 0 &&
2325 blk_ptr->palette_info.pmi.palette_size[0] > 0) {
2326 //MD was done on 8bit, scale palette colors to 10bit
2327 for (uint8_t col = 0; col < blk_ptr->palette_info.pmi.palette_size[0];
2328 col++)
2329 blk_ptr->palette_info.pmi.palette_colors[col] *= 4;
2330 }
2331 // *Note - Transforms are the same size as predictions
2332 // Partition Loop
2333 context_ptr->txb_itr = 0;
2334 // Transform partitioning path (INTRA Luma/Chroma)
2335 if (blk_ptr->use_intrabc == 0) {
2336 // Set the PU Loop Variables
2337 pu_ptr = blk_ptr->prediction_unit_array;
2338
2339 perform_intra_coding_loop(
2340 pcs_ptr, sb_ptr, sb_addr, blk_ptr, pu_ptr, context_ptr);
2341
2342 // Update the Intra-specific Neighbor Arrays
2343 encode_pass_update_intra_mode_neighbor_arrays(
2344 ep_mode_type_neighbor_array,
2345 ep_intra_luma_mode_neighbor_array,
2346 ep_intra_chroma_mode_neighbor_array,
2347 (uint8_t)blk_ptr->pred_mode,
2348 (uint8_t)pu_ptr->intra_chroma_mode,
2349 context_ptr->blk_origin_x,
2350 context_ptr->blk_origin_y,
2351 context_ptr->blk_geom->bwidth,
2352 context_ptr->blk_geom->bheight,
2353 context_ptr->blk_geom->bwidth_uv,
2354 context_ptr->blk_geom->bheight_uv,
2355 blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK
2356 : PICTURE_BUFFER_DESC_LUMA_MASK);
2357
2358 }
2359 // Transform partitioning free patch (except the 128x128 case)
2360 else {
2361 // Set the PU Loop Variables
2362 pu_ptr = blk_ptr->prediction_unit_array;
2363
2364 {
2365 //keep final usefull mvp for entropy
2366 svt_memcpy(blk_ptr->av1xd->final_ref_mv_stack,
2367 context_ptr->md_context
2368 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
2369 .ed_ref_mv_stack[blk_ptr->prediction_unit_array[0]
2370 .ref_frame_type],
2371 sizeof(CandidateMv) * MAX_REF_MV_STACK_SIZE);
2372 {
2373 uint8_t ref_frame_type = blk_ptr->prediction_unit_array[0].ref_frame_type;
2374 MacroBlockD *xd = blk_ptr->av1xd;
2375 if (blk_ptr->pred_mode == NEWMV || blk_ptr->pred_mode == NEW_NEWMV) {
2376 int32_t idx;
2377 for (idx = 0; idx < 2; ++idx) {
2378 if (xd->ref_mv_count[ref_frame_type] > idx + 1)
2379 blk_ptr->drl_ctx[idx] = av1_drl_ctx(xd->final_ref_mv_stack, idx);
2380 else
2381 blk_ptr->drl_ctx[idx] = -1;
2382 }
2383 }
2384
2385 if (have_nearmv_in_inter_mode(blk_ptr->pred_mode)) {
2386 int32_t idx;
2387 // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
2388 for (idx = 1; idx < 3; ++idx) {
2389 if (xd->ref_mv_count[ref_frame_type] > idx + 1)
2390 blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(xd->final_ref_mv_stack, idx);
2391 else
2392 blk_ptr->drl_ctx_near[idx - 1] = -1;
2393 }
2394 }
2395 }
2396
2397
2398 // Set MvUnit
2399 context_ptr->mv_unit.pred_direction =
2400 (uint8_t)pu_ptr->inter_pred_direction_index;
2401 context_ptr->mv_unit.mv[REF_LIST_0].mv_union =
2402 pu_ptr->mv[REF_LIST_0].mv_union;
2403 context_ptr->mv_unit.mv[REF_LIST_1].mv_union =
2404 pu_ptr->mv[REF_LIST_1].mv_union;
2405
2406 EbPictureBufferDesc *ref_pic_list0 =
2407 ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
2408 ->reference_picture_wrapper_ptr->object_ptr)
2409 ->reference_picture;
2410
2411 if (is_16bit)
2412 ref_pic_list0 =
2413 ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
2414 ->reference_picture_wrapper_ptr->object_ptr)
2415 ->reference_picture16bit;
2416
2417 if (is_16bit && !(scs_ptr->static_config.superres_mode > SUPERRES_NONE)) {
2418 av1_inter_prediction_16bit_pipeline(
2419 pcs_ptr,
2420 blk_ptr->interp_filters,
2421 blk_ptr,
2422 blk_ptr->prediction_unit_array->ref_frame_type,
2423 &context_ptr->mv_unit,
2424 1, // use_intrabc,
2425 SIMPLE_TRANSLATION,
2426 0,
2427 0,
2428 1,
2429 &blk_ptr->interinter_comp,
2430 ep_luma_recon_neighbor_array,
2431 ep_cb_recon_neighbor_array,
2432 ep_cr_recon_neighbor_array,
2433 blk_ptr->is_interintra_used,
2434 blk_ptr->interintra_mode,
2435 blk_ptr->use_wedge_interintra,
2436 blk_ptr->interintra_wedge_index,
2437 context_ptr->blk_origin_x,
2438 context_ptr->blk_origin_y,
2439 blk_geom->bwidth,
2440 blk_geom->bheight,
2441 ref_pic_list0,
2442 0,
2443 recon_buffer,
2444 context_ptr->blk_origin_x,
2445 context_ptr->blk_origin_y,
2446 EB_TRUE,
2447 (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2448 } else {
2449 av1_inter_prediction(
2450 scs_ptr,
2451 pcs_ptr,
2452 blk_ptr->interp_filters,
2453 blk_ptr,
2454 blk_ptr->prediction_unit_array->ref_frame_type,
2455 &context_ptr->mv_unit,
2456 1, // use_intrabc,
2457 SIMPLE_TRANSLATION,
2458 0,
2459 0,
2460 1,
2461 &blk_ptr->interinter_comp,
2462 ep_luma_recon_neighbor_array,
2463 ep_cb_recon_neighbor_array,
2464 ep_cr_recon_neighbor_array,
2465 blk_ptr->is_interintra_used,
2466 blk_ptr->interintra_mode,
2467 blk_ptr->use_wedge_interintra,
2468 blk_ptr->interintra_wedge_index,
2469 context_ptr->blk_origin_x,
2470 context_ptr->blk_origin_y,
2471 blk_geom->bwidth,
2472 blk_geom->bheight,
2473 ref_pic_list0,
2474 0,
2475 recon_buffer,
2476 context_ptr->blk_origin_x,
2477 context_ptr->blk_origin_y,
2478 EB_TRUE,
2479 (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2480 }
2481 }
2482 // Initialize the Transform Loop
2483
2484 context_ptr->txb_itr = 0;
2485 y_has_coeff = 0;
2486 u_has_coeff = 0;
2487 v_has_coeff = 0;
2488
2489 uint32_t totTu = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
2490
2491 for (uint8_t tuIt = 0; tuIt < totTu; tuIt++) {
2492 context_ptr->txb_itr = tuIt;
2493 uint8_t uv_pass = blk_ptr->tx_depth && tuIt ? 0 : 1;
2494
2495 uint16_t txb_origin_x = context_ptr->blk_origin_x + context_ptr->blk_geom->tx_org_x[1][blk_ptr->tx_depth][tuIt] - context_ptr->blk_geom->origin_x;
2496 uint16_t txb_origin_y = context_ptr->blk_origin_y + context_ptr->blk_geom->tx_org_y[1][blk_ptr->tx_depth][tuIt] - context_ptr->blk_geom->origin_y;
2497
2498 context_ptr->md_context->luma_txb_skip_context = 0;
2499 context_ptr->md_context->luma_dc_sign_context = 0;
2500 get_txb_ctx(
2501 pcs_ptr,
2502 COMPONENT_LUMA,
2503 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
2504 txb_origin_x,
2505 txb_origin_y,
2506 context_ptr->blk_geom->bsize,
2507 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
2508 &context_ptr->md_context->luma_txb_skip_context,
2509 &context_ptr->md_context->luma_dc_sign_context);
2510
2511
2512 if (context_ptr->blk_geom->has_uv && uv_pass) {
2513 context_ptr->md_context->cb_txb_skip_context = 0;
2514 context_ptr->md_context->cb_dc_sign_context = 0;
2515 get_txb_ctx(
2516 pcs_ptr,
2517 COMPONENT_CHROMA,
2518 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
2519 ROUND_UV(txb_origin_x) >> 1,
2520 ROUND_UV(txb_origin_y) >> 1,
2521 context_ptr->blk_geom->bsize_uv,
2522 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2523 &context_ptr->md_context->cb_txb_skip_context,
2524 &context_ptr->md_context->cb_dc_sign_context);
2525
2526 context_ptr->md_context->cr_txb_skip_context = 0;
2527 context_ptr->md_context->cr_dc_sign_context = 0;
2528 get_txb_ctx(
2529 pcs_ptr,
2530 COMPONENT_CHROMA,
2531 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
2532 ROUND_UV(txb_origin_x) >> 1,
2533 ROUND_UV(txb_origin_y) >> 1,
2534 context_ptr->blk_geom->bsize_uv,
2535 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2536 &context_ptr->md_context->cr_txb_skip_context,
2537 &context_ptr->md_context->cr_dc_sign_context);
2538 }
2539 // Encode Transform Unit -INTRA-
2540 {
2541
2542 av1_encode_loop_func_table[is_16bit](
2543 pcs_ptr,
2544 context_ptr,
2545 sb_ptr,
2546 txb_origin_x,
2547 txb_origin_y,
2548 recon_buffer,
2549 coeff_buffer_sb,
2550 residual_buffer,
2551 transform_buffer,
2552 inverse_quant_buffer,
2553 count_non_zero_coeffs,
2554 (context_ptr->blk_geom->has_uv && uv_pass) ? PICTURE_BUFFER_DESC_FULL_MASK :
2555 PICTURE_BUFFER_DESC_LUMA_MASK,
2556 eobs[context_ptr->txb_itr]);
2557
2558 if (pcs_ptr->cdf_ctrl.update_coef) {
2559 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
2560 context_ptr->md_context->candidate_buffer_ptr_array;
2561 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
2562 &(candidate_buffer_ptr_array_base[0]);
2563 ModeDecisionCandidateBuffer *candidate_buffer;
2564
2565 // Set the Candidate Buffer
2566 candidate_buffer = candidate_buffer_ptr_array[0];
2567 // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
2568 candidate_buffer->candidate_ptr->type =
2569 blk_ptr->prediction_mode_flag;
2570 candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
2571 candidate_buffer->candidate_ptr->filter_intra_mode =
2572 blk_ptr->filter_intra_mode;
2573 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
2574
2575 av1_txb_estimate_coeff_bits(
2576 context_ptr->md_context,
2577 1, //allow_update_cdf,
2578 &pcs_ptr->ec_ctx_array[sb_addr],
2579 pcs_ptr,
2580 candidate_buffer,
2581 coeff1d_offset,
2582 context_ptr->coded_area_sb_uv,
2583 coeff_buffer_sb,
2584 eobs[context_ptr->txb_itr][0],
2585 eobs[context_ptr->txb_itr][1],
2586 eobs[context_ptr->txb_itr][2],
2587 &y_txb_coeff_bits,
2588 &cb_txb_coeff_bits,
2589 &cr_txb_coeff_bits,
2590 context_ptr->blk_geom
2591 ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
2592 context_ptr->blk_geom
2593 ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2594 blk_ptr->txb_array[context_ptr->txb_itr]
2595 .transform_type[PLANE_TYPE_Y],
2596 blk_ptr->txb_array[context_ptr->txb_itr]
2597 .transform_type[PLANE_TYPE_UV],
2598 context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL :
2599 COMPONENT_LUMA);
2600 }
2601 //intra mode
2602 av1_enc_gen_recon_func_ptr[is_16bit](
2603 context_ptr,
2604 txb_origin_x,
2605 txb_origin_y,
2606 recon_buffer,
2607 inverse_quant_buffer,
2608 context_ptr->blk_geom->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK :
2609 PICTURE_BUFFER_DESC_LUMA_MASK,
2610 eobs[context_ptr->txb_itr]);
2611 }
2612 if (context_ptr->blk_geom->has_uv && uv_pass) {
2613 y_has_coeff |=
2614 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr];
2615 u_has_coeff |=
2616 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr];
2617 v_has_coeff |=
2618 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr];
2619 }
2620 else
2621 y_has_coeff |=
2622 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr];
2623
2624 context_ptr->coded_area_sb += blk_geom->tx_width[blk_ptr->tx_depth][tuIt] * blk_geom->tx_height[blk_ptr->tx_depth][tuIt];
2625
2626 if (context_ptr->blk_geom->has_uv && uv_pass)
2627 context_ptr->coded_area_sb_uv += blk_geom->tx_width_uv[blk_ptr->tx_depth][tuIt] * blk_geom->tx_height_uv[blk_ptr->tx_depth][tuIt];
2628
2629 // Update the luma Dc Sign Level Coeff Neighbor Array
2630 {
2631 uint8_t dcSignLevelCoeff =
2632 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
2633 neighbor_array_unit_mode_write(
2634 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
2635 (uint8_t*)&dcSignLevelCoeff,
2636 txb_origin_x,
2637 txb_origin_y,
2638 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
2639 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
2640 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
2641 }
2642
2643 if (context_ptr->blk_geom->has_uv && uv_pass)
2644 {
2645 // Update the cb Dc Sign Level Coeff Neighbor Array
2646 uint8_t dcSignLevelCoeff =
2647 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
2648 neighbor_array_unit_mode_write(
2649 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
2650 (uint8_t*)&dcSignLevelCoeff,
2651 ROUND_UV(txb_origin_x) >> 1,
2652 ROUND_UV(txb_origin_y) >> 1,
2653 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2654 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2655 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
2656
2657 // Update the cr DC Sign Level Coeff Neighbor Array
2658 dcSignLevelCoeff =
2659 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
2660 neighbor_array_unit_mode_write(
2661 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
2662 (uint8_t*)&dcSignLevelCoeff,
2663 ROUND_UV(txb_origin_x) >> 1,
2664 ROUND_UV(txb_origin_y) >> 1,
2665 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2666 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2667 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
2668 }
2669
2670 } // Transform Loop
2671 // Calculate Root CBF
2672 if (context_ptr->blk_geom->has_uv)
2673 blk_ptr->block_has_coeff = (y_has_coeff | u_has_coeff | v_has_coeff) ? EB_TRUE : EB_FALSE;
2674 else
2675 blk_ptr->block_has_coeff = (y_has_coeff) ? EB_TRUE : EB_FALSE;
2676
2677 // Update the Intra-specific Neighbor Arrays
2678 encode_pass_update_intra_mode_neighbor_arrays(
2679 ep_mode_type_neighbor_array,
2680 ep_intra_luma_mode_neighbor_array,
2681 ep_intra_chroma_mode_neighbor_array,
2682 (uint8_t)blk_ptr->pred_mode,
2683 (uint8_t)pu_ptr->intra_chroma_mode,
2684 context_ptr->blk_origin_x,
2685 context_ptr->blk_origin_y,
2686 context_ptr->blk_geom->bwidth,
2687 context_ptr->blk_geom->bheight,
2688 context_ptr->blk_geom->bwidth_uv,
2689 context_ptr->blk_geom->bheight_uv,
2690 blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK
2691 : PICTURE_BUFFER_DESC_LUMA_MASK);
2692
2693 // Update Recon Samples-INTRA-
2694 encode_pass_update_recon_sample_neighbour_arrays(
2695 ep_luma_recon_neighbor_array,
2696 ep_cb_recon_neighbor_array,
2697 ep_cr_recon_neighbor_array,
2698 recon_buffer,
2699 context_ptr->blk_origin_x,
2700 context_ptr->blk_origin_y,
2701 context_ptr->blk_geom->bwidth,
2702 context_ptr->blk_geom->bheight,
2703 context_ptr->blk_geom->bwidth_uv,
2704 context_ptr->blk_geom->bheight_uv,
2705 context_ptr->blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
2706 is_16bit);
2707 }
2708 }
2709
2710 // Inter
2711 else if (blk_ptr->prediction_mode_flag == INTER_MODE) {
2712 uint8_t is_inter = context_ptr->is_inter = 1;
2713 MvReferenceFrame rf[2];
2714 av1_set_ref_frame(rf, (&blk_ptr->prediction_unit_array[0])->ref_frame_type);
2715 int8_t ref_idx_l0 = get_ref_frame_idx(rf[0]);
2716 int8_t ref_idx_l1 = rf[1] == NONE_FRAME ? get_ref_frame_idx(rf[0]) : get_ref_frame_idx(rf[1]);
2717 uint8_t list_idx0, list_idx1;
2718 list_idx0 = get_list_idx(rf[0]);
2719 if (rf[1] == NONE_FRAME)
2720 list_idx1 = get_list_idx(rf[0]);
2721 else
2722 list_idx1 = get_list_idx(rf[1]);
2723 EbReferenceObject *ref_obj_0 =
2724 ref_idx_l0 >= 0
2725 ? (EbReferenceObject *)pcs_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]
2726 ->object_ptr
2727 : (EbReferenceObject *)NULL;
2728 EbReferenceObject *ref_obj_1 =
2729 ref_idx_l1 >= 0
2730 ? (EbReferenceObject *)pcs_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]
2731 ->object_ptr
2732 : (EbReferenceObject *)NULL;
2733 uint16_t txb_origin_x;
2734 uint16_t txb_origin_y;
2735 EbBool is_blk_skip = EB_FALSE;
2736
2737 //********************************
2738 // INTER
2739 //********************************
2740 // Perform Merge/Skip Decision if the mode coming from MD is merge. for the First CU in Row merge will remain as is.
2741 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE) {
2742 is_blk_skip =
2743 md_context_ptr->md_ep_pipe_sb[blk_ptr->mds_idx].skip_cost <=
2744 md_context_ptr->md_ep_pipe_sb[blk_ptr->mds_idx].merge_cost
2745 ? 1
2746 : 0;
2747 }
2748 //keep final usefull mvp for entropy
2749 svt_memcpy(blk_ptr->av1xd->final_ref_mv_stack,
2750 context_ptr->md_context
2751 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
2752 .ed_ref_mv_stack[blk_ptr->prediction_unit_array[0].ref_frame_type],
2753 sizeof(CandidateMv) * MAX_REF_MV_STACK_SIZE);
2754
2755 // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC
2756 uint8_t ref_frame_type_tmp = blk_ptr->prediction_unit_array[0].ref_frame_type;
2757 if (blk_ptr->pred_mode == NEWMV || blk_ptr->pred_mode == NEW_NEWMV) {
2758 int32_t idx;
2759 for (idx = 0; idx < 2; ++idx) {
2760 if (blk_ptr->av1xd->ref_mv_count[ref_frame_type_tmp] > idx + 1)
2761 blk_ptr->drl_ctx[idx] = av1_drl_ctx(blk_ptr->av1xd->final_ref_mv_stack, idx);
2762 else
2763 blk_ptr->drl_ctx[idx] = -1;
2764 }
2765 }
2766
2767 if (have_nearmv_in_inter_mode(blk_ptr->pred_mode)) {
2768 int32_t idx;
2769 // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
2770 for (idx = 1; idx < 3; ++idx) {
2771 if (blk_ptr->av1xd->ref_mv_count[ref_frame_type_tmp] > idx + 1)
2772 blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(blk_ptr->av1xd->final_ref_mv_stack, idx);
2773 else
2774 blk_ptr->drl_ctx_near[idx - 1] = -1;
2775 }
2776 }
2777
2778 {
2779 // 1st Partition Loop
2780 pu_ptr = blk_ptr->prediction_unit_array;
2781
2782 // Set MvUnit
2783 context_ptr->mv_unit.pred_direction =
2784 (uint8_t)pu_ptr->inter_pred_direction_index;
2785 context_ptr->mv_unit.mv[REF_LIST_0].mv_union =
2786 pu_ptr->mv[REF_LIST_0].mv_union;
2787 context_ptr->mv_unit.mv[REF_LIST_1].mv_union =
2788 pu_ptr->mv[REF_LIST_1].mv_union;
2789
2790 // Inter Prediction
2791 if (pu_ptr->motion_mode == WARPED_CAUSAL) {
2792 EbPictureBufferDesc *ref_pic_list0;
2793 EbPictureBufferDesc *ref_pic_list1;
2794 if (!is_16bit) {
2795 ref_pic_list0 = ref_idx_l0 >= 0
2796 ? ref_obj_0->reference_picture
2797 : (EbPictureBufferDesc *)NULL;
2798 ref_pic_list1 = ref_idx_l1 >= 0
2799 ? ref_obj_1->reference_picture
2800 : (EbPictureBufferDesc *)NULL;
2801 }
2802 else {
2803 ref_pic_list0 = ref_idx_l0 >= 0
2804 ? ref_obj_0->reference_picture16bit
2805 : (EbPictureBufferDesc *)NULL;
2806 ref_pic_list1 = ref_idx_l1 >= 0
2807 ? ref_obj_1->reference_picture16bit
2808 : (EbPictureBufferDesc *)NULL;
2809 }
2810 warped_motion_prediction(
2811 pcs_ptr,
2812 &context_ptr->mv_unit,
2813 blk_ptr->prediction_unit_array[0].ref_frame_type,
2814 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].compound_idx,
2815 &context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].interinter_comp,
2816 context_ptr->blk_origin_x,
2817 context_ptr->blk_origin_y,
2818 blk_ptr,
2819 blk_geom,
2820 ref_pic_list0,
2821 ref_pic_list1,
2822 recon_buffer,
2823 context_ptr->blk_origin_x,
2824 context_ptr->blk_origin_y,
2825 ep_luma_recon_neighbor_array,
2826 ep_cb_recon_neighbor_array,
2827 ep_cr_recon_neighbor_array,
2828 NULL,
2829
2830 &context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].wm_params_l0,
2831 &context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].wm_params_l1,
2832 (uint8_t)scs_ptr->static_config.encoder_bit_depth,
2833 EB_TRUE,
2834 EB_TRUE);
2835 }
2836
2837 if (pu_ptr->motion_mode != WARPED_CAUSAL) {
2838 EbPictureBufferDesc *ref_pic_list0;
2839 EbPictureBufferDesc *ref_pic_list1;
2840
2841 if (!is_16bit) {
2842 ref_pic_list0 = ref_idx_l0 >= 0
2843 ? ref_obj_0->reference_picture
2844 : (EbPictureBufferDesc *)NULL;
2845 ref_pic_list1 = ref_idx_l1 >= 0
2846 ? ref_obj_1->reference_picture
2847 : (EbPictureBufferDesc *)NULL;
2848 } else {
2849 ref_pic_list0 = ref_idx_l0 >= 0
2850 ? ref_obj_0->reference_picture16bit
2851 : (EbPictureBufferDesc *)NULL;
2852 ref_pic_list1 = ref_idx_l1 >= 0
2853 ? ref_obj_1->reference_picture16bit
2854 : (EbPictureBufferDesc *)NULL;
2855 }
2856
2857
2858 if (is_16bit && !(scs_ptr->static_config.superres_mode > SUPERRES_NONE)) {
2859 av1_inter_prediction_16bit_pipeline(
2860 pcs_ptr,
2861 blk_ptr->interp_filters,
2862 blk_ptr,
2863 blk_ptr->prediction_unit_array->ref_frame_type,
2864 &context_ptr->mv_unit,
2865 0, //use_intrabc,
2866 blk_ptr->prediction_unit_array->motion_mode,
2867 0, //use_precomputed_obmc,
2868 0,
2869 blk_ptr->compound_idx,
2870 &blk_ptr->interinter_comp,
2871 ep_luma_recon_neighbor_array,
2872 ep_cb_recon_neighbor_array,
2873 ep_cr_recon_neighbor_array,
2874 blk_ptr->is_interintra_used,
2875 blk_ptr->interintra_mode,
2876 blk_ptr->use_wedge_interintra,
2877 blk_ptr->interintra_wedge_index,
2878 context_ptr->blk_origin_x,
2879 context_ptr->blk_origin_y,
2880 blk_geom->bwidth,
2881 blk_geom->bheight,
2882 ref_pic_list0,
2883 ref_pic_list1,
2884 recon_buffer,
2885 context_ptr->blk_origin_x,
2886 context_ptr->blk_origin_y,
2887 EB_TRUE,
2888 (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2889 } else {
2890 av1_inter_prediction(
2891 scs_ptr,
2892 pcs_ptr,
2893 blk_ptr->interp_filters,
2894 blk_ptr,
2895 blk_ptr->prediction_unit_array->ref_frame_type,
2896 &context_ptr->mv_unit,
2897 0, //use_intrabc,
2898 blk_ptr->prediction_unit_array->motion_mode,
2899 0, //use_precomputed_obmc,
2900 0,
2901 blk_ptr->compound_idx,
2902 &blk_ptr->interinter_comp,
2903 ep_luma_recon_neighbor_array,
2904 ep_cb_recon_neighbor_array,
2905 ep_cr_recon_neighbor_array,
2906 blk_ptr->is_interintra_used,
2907 blk_ptr->interintra_mode,
2908 blk_ptr->use_wedge_interintra,
2909 blk_ptr->interintra_wedge_index,
2910
2911 context_ptr->blk_origin_x,
2912 context_ptr->blk_origin_y,
2913 blk_geom->bwidth,
2914 blk_geom->bheight,
2915 ref_pic_list0,
2916 ref_pic_list1,
2917 recon_buffer,
2918 context_ptr->blk_origin_x,
2919 context_ptr->blk_origin_y,
2920 EB_TRUE,
2921 (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2922 }
2923 }
2924 }
2925 context_ptr->txb_itr = 0;
2926 // Transform Loop
2927 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[0] = EB_FALSE;
2928 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[0] = EB_FALSE;
2929 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[0] = EB_FALSE;
2930
2931 uint16_t tot_tu = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
2932 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_FALSE) {
2933 for (uint16_t tu_it = 0; tu_it < tot_tu; tu_it++) {
2934 context_ptr->txb_itr = (uint8_t)tu_it;
2935 uint8_t uv_pass =
2936 blk_ptr->tx_depth && tu_it ? 0 : 1; //NM: 128x128 exeption
2937 txb_origin_x =
2938 context_ptr->blk_origin_x +
2939 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][tu_it] -
2940 context_ptr->blk_geom->origin_x;
2941 txb_origin_y =
2942 context_ptr->blk_origin_y +
2943 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][tu_it] -
2944 context_ptr->blk_geom->origin_y;
2945
2946 context_ptr->md_context->luma_txb_skip_context = 0;
2947 context_ptr->md_context->luma_dc_sign_context = 0;
2948 get_txb_ctx(pcs_ptr,
2949 COMPONENT_LUMA,
2950 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
2951 txb_origin_x,
2952 txb_origin_y,
2953 context_ptr->blk_geom->bsize,
2954 context_ptr->blk_geom
2955 ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
2956 &context_ptr->md_context->luma_txb_skip_context,
2957 &context_ptr->md_context->luma_dc_sign_context);
2958
2959 if (context_ptr->blk_geom->has_uv && uv_pass) {
2960 context_ptr->md_context->cb_txb_skip_context = 0;
2961 context_ptr->md_context->cb_dc_sign_context = 0;
2962 get_txb_ctx(
2963 pcs_ptr,
2964 COMPONENT_CHROMA,
2965 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
2966 ROUND_UV(txb_origin_x) >> 1,
2967 ROUND_UV(txb_origin_y) >> 1,
2968 context_ptr->blk_geom->bsize_uv,
2969 context_ptr->blk_geom->txsize_uv[context_ptr->blk_ptr->tx_depth]
2970 [context_ptr->txb_itr],
2971 &context_ptr->md_context->cb_txb_skip_context,
2972 &context_ptr->md_context->cb_dc_sign_context);
2973
2974 context_ptr->md_context->cr_txb_skip_context = 0;
2975 context_ptr->md_context->cr_dc_sign_context = 0;
2976 get_txb_ctx(pcs_ptr,
2977 COMPONENT_CHROMA,
2978 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
2979 ROUND_UV(txb_origin_x) >> 1,
2980 ROUND_UV(txb_origin_y) >> 1,
2981 context_ptr->blk_geom->bsize_uv,
2982 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth]
2983 [context_ptr->txb_itr],
2984 &context_ptr->md_context->cr_txb_skip_context,
2985 &context_ptr->md_context->cr_dc_sign_context);
2986 }
2987
2988 //inter mode 1
2989 av1_encode_loop_func_table[is_16bit](
2990 pcs_ptr,
2991 context_ptr,
2992 sb_ptr,
2993 txb_origin_x, //pic org
2994 txb_origin_y,
2995 recon_buffer,
2996 coeff_buffer_sb,
2997 residual_buffer,
2998 transform_buffer,
2999 inverse_quant_buffer,
3000 count_non_zero_coeffs,
3001 context_ptr->blk_geom->has_uv && uv_pass
3002 ? PICTURE_BUFFER_DESC_FULL_MASK
3003 : PICTURE_BUFFER_DESC_LUMA_MASK,
3004 eobs[context_ptr->txb_itr]);
3005 context_ptr->md_context
3006 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3007 .y_has_coeff[context_ptr->txb_itr] =
3008 count_non_zero_coeffs[0] != 0 ? EB_TRUE : EB_FALSE;
3009 context_ptr->md_context
3010 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3011 .u_has_coeff[context_ptr->txb_itr] =
3012 count_non_zero_coeffs[1] != 0 ? EB_TRUE : EB_FALSE;
3013 context_ptr->md_context
3014 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3015 .v_has_coeff[context_ptr->txb_itr] =
3016 count_non_zero_coeffs[2] != 0 ? EB_TRUE : EB_FALSE;
3017 // Update count_non_zero_coeffs after CBF decision
3018 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] ==
3019 EB_FALSE)
3020 count_non_zero_coeffs[0] = 0;
3021 if (context_ptr->blk_geom->has_uv && uv_pass) {
3022 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3023 count_non_zero_coeffs[1] = 0;
3024 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3025 count_non_zero_coeffs[2] = 0;
3026 }
3027
3028 // Update TU count_non_zero_coeffs
3029 blk_ptr->txb_array[context_ptr->txb_itr].nz_coef_count[0] =
3030 (uint16_t)count_non_zero_coeffs[0];
3031 blk_ptr->txb_array[context_ptr->txb_itr].nz_coef_count[1] =
3032 (uint16_t)count_non_zero_coeffs[1];
3033 blk_ptr->txb_array[context_ptr->txb_itr].nz_coef_count[2] =
3034 (uint16_t)count_non_zero_coeffs[2];
3035 if (pcs_ptr->cdf_ctrl.update_coef) {
3036 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
3037 context_ptr->md_context->candidate_buffer_ptr_array;
3038 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
3039 &(candidate_buffer_ptr_array_base[0]);
3040 ModeDecisionCandidateBuffer *candidate_buffer;
3041
3042 // Set the Candidate Buffer
3043 candidate_buffer = candidate_buffer_ptr_array[0];
3044 // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
3045 candidate_buffer->candidate_ptr->type =
3046 blk_ptr->prediction_mode_flag;
3047 candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
3048 candidate_buffer->candidate_ptr->filter_intra_mode =
3049 blk_ptr->filter_intra_mode;
3050 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
3051
3052 //CHKN add updating eobs[] after CBF decision
3053 if (context_ptr->md_context
3054 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3055 .y_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3056 eobs[context_ptr->txb_itr][0] = 0;
3057 if (context_ptr->blk_geom->has_uv && uv_pass) {
3058 if (context_ptr->md_context
3059 ->md_local_blk_unit[context_ptr->blk_geom
3060 ->blkidx_mds]
3061 .u_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3062 eobs[context_ptr->txb_itr][1] = 0;
3063 if (context_ptr->md_context
3064 ->md_local_blk_unit[context_ptr->blk_geom
3065 ->blkidx_mds]
3066 .v_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3067 eobs[context_ptr->txb_itr][2] = 0;
3068 }
3069
3070 av1_txb_estimate_coeff_bits(
3071 context_ptr->md_context,
3072 1, //allow_update_cdf,
3073 &pcs_ptr->ec_ctx_array[sb_addr],
3074 pcs_ptr,
3075 candidate_buffer,
3076 coeff1d_offset,
3077 context_ptr->coded_area_sb_uv,
3078 coeff_buffer_sb,
3079 eobs[context_ptr->txb_itr][0],
3080 eobs[context_ptr->txb_itr][1],
3081 eobs[context_ptr->txb_itr][2],
3082 &y_txb_coeff_bits,
3083 &cb_txb_coeff_bits,
3084 &cr_txb_coeff_bits,
3085 context_ptr->blk_geom
3086 ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
3087 context_ptr->blk_geom
3088 ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3089 blk_ptr->txb_array[context_ptr->txb_itr]
3090 .transform_type[PLANE_TYPE_Y],
3091 blk_ptr->txb_array[context_ptr->txb_itr]
3092 .transform_type[PLANE_TYPE_UV],
3093 context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL
3094 : COMPONENT_LUMA);
3095 }
3096 context_ptr->coded_area_sb +=
3097 blk_geom->tx_width[blk_ptr->tx_depth][tu_it] *
3098 blk_geom->tx_height[blk_ptr->tx_depth][tu_it];
3099 if (context_ptr->blk_geom->has_uv && uv_pass)
3100 context_ptr->coded_area_sb_uv +=
3101 blk_geom->tx_width_uv[blk_ptr->tx_depth][tu_it] *
3102 blk_geom->tx_height_uv[blk_ptr->tx_depth][tu_it];
3103
3104 // Update the luma Dc Sign Level Coeff Neighbor Array
3105 {
3106 uint8_t dc_sign_level_coeff =
3107 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
3108 neighbor_array_unit_mode_write(
3109 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
3110 (uint8_t *)&dc_sign_level_coeff,
3111 txb_origin_x,
3112 txb_origin_y,
3113 context_ptr->blk_geom
3114 ->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
3115 context_ptr->blk_geom
3116 ->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
3117 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3118 }
3119
3120 if (context_ptr->blk_geom->has_uv && uv_pass) {
3121 // Update the cb Dc Sign Level Coeff Neighbor Array
3122 uint8_t dc_sign_level_coeff =
3123 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
3124 neighbor_array_unit_mode_write(
3125 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
3126 (uint8_t *)&dc_sign_level_coeff,
3127 ROUND_UV(txb_origin_x) >> 1,
3128 ROUND_UV(txb_origin_y) >> 1,
3129 context_ptr->blk_geom
3130 ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3131 context_ptr->blk_geom
3132 ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3133 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3134
3135 // Update the cr DC Sign Level Coeff Neighbor Array
3136 dc_sign_level_coeff =
3137 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
3138 neighbor_array_unit_mode_write(
3139 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
3140 (uint8_t *)&dc_sign_level_coeff,
3141 ROUND_UV(txb_origin_x) >> 1,
3142 ROUND_UV(txb_origin_y) >> 1,
3143 context_ptr->blk_geom
3144 ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3145 context_ptr->blk_geom
3146 ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3147 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3148 }
3149
3150 } // Transform Loop
3151 }
3152
3153 //Set Final CU data flags after skip/Merge decision.
3154 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE) {
3155 blk_ptr->skip_flag = (is_blk_skip) ? EB_TRUE : EB_FALSE;
3156 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed =
3157 (is_blk_skip) ? EB_FALSE : EB_TRUE;
3158 }
3159
3160 // Initialize the Transform Loop
3161
3162 context_ptr->txb_itr = 0;
3163 y_has_coeff = 0;
3164 u_has_coeff = 0;
3165 v_has_coeff = 0;
3166 tot_tu = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
3167
3168 //reset coeff buffer offsets at the start of a new Tx loop
3169 context_ptr->coded_area_sb = coded_area_org;
3170 context_ptr->coded_area_sb_uv = coded_area_org_uv;
3171 for (uint16_t tu_it = 0; tu_it < tot_tu; tu_it++) {
3172 uint8_t uv_pass = blk_ptr->tx_depth && tu_it ? 0 : 1; //NM: 128x128 exeption
3173 context_ptr->txb_itr = (uint8_t)tu_it;
3174 txb_origin_x = context_ptr->blk_origin_x +
3175 (context_ptr->blk_geom
3176 ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
3177 context_ptr->blk_geom->origin_x);
3178 txb_origin_y = context_ptr->blk_origin_y +
3179 (context_ptr->blk_geom
3180 ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
3181 context_ptr->blk_geom->origin_y);
3182 context_ptr->md_context->luma_txb_skip_context = 0;
3183 context_ptr->md_context->luma_dc_sign_context = 0;
3184 get_txb_ctx(
3185 pcs_ptr,
3186 COMPONENT_LUMA,
3187 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
3188 txb_origin_x,
3189 txb_origin_y,
3190 context_ptr->blk_geom->bsize,
3191 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
3192 &context_ptr->md_context->luma_txb_skip_context,
3193 &context_ptr->md_context->luma_dc_sign_context);
3194
3195 if (context_ptr->blk_geom->has_uv && uv_pass) {
3196 context_ptr->md_context->cb_txb_skip_context = 0;
3197 context_ptr->md_context->cb_dc_sign_context = 0;
3198 get_txb_ctx(
3199 pcs_ptr,
3200 COMPONENT_CHROMA,
3201 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
3202 ROUND_UV(txb_origin_x) >> 1,
3203 ROUND_UV(txb_origin_y) >> 1,
3204 context_ptr->blk_geom->bsize_uv,
3205 context_ptr->blk_geom->txsize_uv[context_ptr->blk_ptr->tx_depth]
3206 [context_ptr->txb_itr],
3207 &context_ptr->md_context->cb_txb_skip_context,
3208 &context_ptr->md_context->cb_dc_sign_context);
3209
3210 context_ptr->md_context->cr_txb_skip_context = 0;
3211 context_ptr->md_context->cr_dc_sign_context = 0;
3212 get_txb_ctx(pcs_ptr,
3213 COMPONENT_CHROMA,
3214 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
3215 ROUND_UV(txb_origin_x) >> 1,
3216 ROUND_UV(txb_origin_y) >> 1,
3217 context_ptr->blk_geom->bsize_uv,
3218 context_ptr->blk_geom
3219 ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3220 &context_ptr->md_context->cr_txb_skip_context,
3221 &context_ptr->md_context->cr_dc_sign_context);
3222 }
3223 if (blk_ptr->skip_flag == EB_TRUE) {
3224 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] = EB_FALSE;
3225 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] = EB_FALSE;
3226 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] = EB_FALSE;
3227
3228 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = 0;
3229 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = 0;
3230 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = 0;
3231 } else if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE) {
3232
3233 //inter mode 2
3234
3235 av1_encode_loop_func_table[is_16bit](
3236 pcs_ptr,
3237 context_ptr,
3238 sb_ptr,
3239 txb_origin_x, //pic offset
3240 txb_origin_y,
3241 recon_buffer,
3242 coeff_buffer_sb,
3243 residual_buffer,
3244 transform_buffer,
3245 inverse_quant_buffer,
3246 count_non_zero_coeffs,
3247 context_ptr->blk_geom->has_uv && uv_pass
3248 ? PICTURE_BUFFER_DESC_FULL_MASK
3249 : PICTURE_BUFFER_DESC_LUMA_MASK,
3250 eobs[context_ptr->txb_itr]);
3251
3252
3253 if (pcs_ptr->cdf_ctrl.update_coef) {
3254 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
3255 context_ptr->md_context->candidate_buffer_ptr_array;
3256 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
3257 &(candidate_buffer_ptr_array_base[0]);
3258 ModeDecisionCandidateBuffer *candidate_buffer;
3259
3260 // Set the Candidate Buffer
3261 candidate_buffer = candidate_buffer_ptr_array[0];
3262 // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
3263 candidate_buffer->candidate_ptr->type =
3264 blk_ptr->prediction_mode_flag;
3265 candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
3266 candidate_buffer->candidate_ptr->filter_intra_mode =
3267 blk_ptr->filter_intra_mode;
3268 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
3269
3270 av1_txb_estimate_coeff_bits(
3271 context_ptr->md_context,
3272 1, //allow_update_cdf,
3273 &pcs_ptr->ec_ctx_array[sb_addr],
3274 pcs_ptr,
3275 candidate_buffer,
3276 coeff1d_offset,
3277 context_ptr->coded_area_sb_uv,
3278 coeff_buffer_sb,
3279 eobs[context_ptr->txb_itr][0],
3280 eobs[context_ptr->txb_itr][1],
3281 eobs[context_ptr->txb_itr][2],
3282 &y_txb_coeff_bits,
3283 &cb_txb_coeff_bits,
3284 &cr_txb_coeff_bits,
3285 context_ptr->blk_geom
3286 ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
3287 context_ptr->blk_geom
3288 ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3289 blk_ptr->txb_array[context_ptr->txb_itr]
3290 .transform_type[PLANE_TYPE_Y],
3291 blk_ptr->txb_array[context_ptr->txb_itr]
3292 .transform_type[PLANE_TYPE_UV],
3293 context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL
3294 : COMPONENT_LUMA);
3295 }
3296 }
3297 if (context_ptr->blk_geom->has_uv && uv_pass) {
3298 blk_ptr->block_has_coeff =
3299 blk_ptr->block_has_coeff |
3300 context_ptr->md_context
3301 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3302 .y_has_coeff[context_ptr->txb_itr] |
3303 context_ptr->md_context
3304 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3305 .u_has_coeff[context_ptr->txb_itr] |
3306 context_ptr->md_context
3307 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3308 .v_has_coeff[context_ptr->txb_itr];
3309 } else {
3310 blk_ptr->block_has_coeff =
3311 blk_ptr->block_has_coeff |
3312 context_ptr->md_context
3313 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3314 .y_has_coeff[context_ptr->txb_itr];
3315 }
3316
3317 //inter mode
3318 av1_enc_gen_recon_func_ptr[is_16bit](
3319 context_ptr,
3320 txb_origin_x, //pic offset
3321 txb_origin_y,
3322 recon_buffer,
3323 inverse_quant_buffer,
3324 context_ptr->blk_geom->has_uv && uv_pass
3325 ? PICTURE_BUFFER_DESC_FULL_MASK
3326 : PICTURE_BUFFER_DESC_LUMA_MASK,
3327 eobs[context_ptr->txb_itr]);
3328
3329 if (context_ptr->blk_geom->has_uv && uv_pass) {
3330 y_has_coeff |=
3331 context_ptr->md_context
3332 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3333 .y_has_coeff[context_ptr->txb_itr];
3334 u_has_coeff |=
3335 context_ptr->md_context
3336 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3337 .u_has_coeff[context_ptr->txb_itr];
3338 v_has_coeff |=
3339 context_ptr->md_context
3340 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3341 .v_has_coeff[context_ptr->txb_itr];
3342 } else
3343 y_has_coeff |=
3344 context_ptr->md_context
3345 ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3346 .y_has_coeff[context_ptr->txb_itr];
3347
3348 context_ptr->coded_area_sb += blk_geom->tx_width[blk_ptr->tx_depth][tu_it] *
3349 blk_geom->tx_height[blk_ptr->tx_depth][tu_it];
3350
3351 if (context_ptr->blk_geom->has_uv && uv_pass)
3352 context_ptr->coded_area_sb_uv +=
3353 blk_geom->tx_width_uv[blk_ptr->tx_depth][tu_it] *
3354 blk_geom->tx_height_uv[blk_ptr->tx_depth][tu_it];
3355
3356 // Update the luma Dc Sign Level Coeff Neighbor Array
3357 {
3358 uint8_t dc_sign_level_coeff =
3359 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
3360 neighbor_array_unit_mode_write(
3361 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
3362 (uint8_t *)&dc_sign_level_coeff,
3363 txb_origin_x,
3364 txb_origin_y,
3365 context_ptr->blk_geom
3366 ->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
3367 context_ptr->blk_geom
3368 ->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
3369 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3370 }
3371
3372 // Update the cb Dc Sign Level Coeff Neighbor Array
3373 if (context_ptr->blk_geom->has_uv && uv_pass) {
3374 uint8_t dc_sign_level_coeff =
3375 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
3376 neighbor_array_unit_mode_write(
3377 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
3378 (uint8_t *)&dc_sign_level_coeff,
3379 ROUND_UV(txb_origin_x) >> 1,
3380 ROUND_UV(txb_origin_y) >> 1,
3381 context_ptr->blk_geom
3382 ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3383 context_ptr->blk_geom
3384 ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3385 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3386 // Update the cr DC Sign Level Coeff Neighbor Array
3387 dc_sign_level_coeff =
3388 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
3389 neighbor_array_unit_mode_write(
3390 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
3391 (uint8_t *)&dc_sign_level_coeff,
3392 ROUND_UV(txb_origin_x) >> 1,
3393 ROUND_UV(txb_origin_y) >> 1,
3394 context_ptr->blk_geom
3395 ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3396 context_ptr->blk_geom
3397 ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3398 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3399 }
3400
3401 } // Transform Loop
3402
3403 // Calculate Root CBF
3404 if (context_ptr->blk_geom->has_uv)
3405 blk_ptr->block_has_coeff =
3406 (y_has_coeff | u_has_coeff | v_has_coeff) ? EB_TRUE : EB_FALSE;
3407 else
3408 blk_ptr->block_has_coeff = (y_has_coeff) ? EB_TRUE : EB_FALSE;
3409
3410 // Force Skip if MergeFlag == TRUE && RootCbf == 0
3411
3412 if (blk_ptr->skip_flag == EB_FALSE &&
3413 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE &&
3414 blk_ptr->block_has_coeff == EB_FALSE) {
3415 blk_ptr->skip_flag = EB_TRUE;
3416 }
3417
3418 {
3419 // Set the PU Loop Variables
3420 pu_ptr = blk_ptr->prediction_unit_array;
3421
3422 // Set MvUnit
3423 context_ptr->mv_unit.pred_direction =
3424 (uint8_t)pu_ptr->inter_pred_direction_index;
3425 context_ptr->mv_unit.mv[REF_LIST_0].mv_union =
3426 pu_ptr->mv[REF_LIST_0].mv_union;
3427 context_ptr->mv_unit.mv[REF_LIST_1].mv_union =
3428 pu_ptr->mv[REF_LIST_1].mv_union;
3429
3430 // Update Neighbor Arrays (Mode Type, mvs, SKIP)
3431 {
3432 uint8_t skip_flag = (uint8_t)blk_ptr->skip_flag;
3433 encode_pass_update_inter_mode_neighbor_arrays(
3434 ep_mode_type_neighbor_array,
3435 ep_mv_neighbor_array,
3436 ep_skip_flag_neighbor_array,
3437 &context_ptr->mv_unit,
3438 &skip_flag,
3439 context_ptr->blk_origin_x,
3440 context_ptr->blk_origin_y,
3441 blk_geom->bwidth,
3442 blk_geom->bheight);
3443 }
3444 } // 2nd Partition Loop
3445
3446 // Update Recon Samples Neighbor Arrays -INTER-
3447 encode_pass_update_recon_sample_neighbour_arrays(
3448 ep_luma_recon_neighbor_array,
3449 ep_cb_recon_neighbor_array,
3450 ep_cr_recon_neighbor_array,
3451 recon_buffer,
3452 context_ptr->blk_origin_x,
3453 context_ptr->blk_origin_y,
3454 context_ptr->blk_geom->bwidth,
3455 context_ptr->blk_geom->bheight,
3456 context_ptr->blk_geom->bwidth_uv,
3457 context_ptr->blk_geom->bheight_uv,
3458 context_ptr->blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK
3459 : PICTURE_BUFFER_DESC_LUMA_MASK,
3460 is_16bit);
3461
3462 } else {
3463 CHECK_REPORT_ERROR_NC(encode_context_ptr->app_callback_ptr, EB_ENC_CL_ERROR2);
3464 }
3465 if (pcs_ptr->parent_pcs_ptr->frm_hdr.allow_intrabc && is_16bit && (context_ptr->bit_depth == EB_8BIT)) {
3466 EbPictureBufferDesc *recon_buffer_16bit;
3467 EbPictureBufferDesc *recon_buffer_8bit;
3468 if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
3469 //get the 16bit form of the input SB
3470 recon_buffer_16bit = ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
3471 ->reference_picture_wrapper_ptr->object_ptr)
3472 ->reference_picture16bit;
3473 else // non ref pictures
3474 recon_buffer_16bit = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr;
3475
3476 if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
3477 //get the 16bit form of the input SB
3478 recon_buffer_8bit = ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
3479 ->reference_picture_wrapper_ptr->object_ptr)
3480 ->reference_picture;
3481 else // non ref pictures
3482 recon_buffer_8bit = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
3483
3484 uint32_t pred_buf_x_offest = context_ptr->blk_origin_x;
3485 uint32_t pred_buf_y_offest = context_ptr->blk_origin_y;
3486
3487 uint16_t *dst_16bit = (uint16_t *)(recon_buffer_16bit->buffer_y) +
3488 pred_buf_x_offest + recon_buffer_16bit->origin_x +
3489 (pred_buf_y_offest + recon_buffer_16bit->origin_y) *
3490 recon_buffer_16bit->stride_y;
3491 int32_t dst_stride_16bit = recon_buffer_16bit->stride_y;
3492
3493 uint8_t *dst;
3494 int32_t dst_stride;
3495
3496 dst = recon_buffer_8bit->buffer_y + pred_buf_x_offest + recon_buffer_8bit->origin_x +
3497 (pred_buf_y_offest + recon_buffer_8bit->origin_y) * recon_buffer_8bit->stride_y;
3498 dst_stride = recon_buffer_8bit->stride_y;
3499
3500 svt_convert_16bit_to_8bit(dst_16bit,
3501 dst_stride_16bit,
3502 dst,
3503 dst_stride,
3504 context_ptr->blk_geom->bwidth,
3505 context_ptr->blk_geom->bheight);
3506
3507 //copy recon from 16bit to 8bit
3508 pred_buf_x_offest = ((context_ptr->blk_origin_x >> 3) << 3) >> 1;
3509 pred_buf_y_offest = ((context_ptr->blk_origin_y >> 3) << 3) >> 1;
3510
3511 dst_16bit = (uint16_t *)(recon_buffer_16bit->buffer_cb) +
3512 pred_buf_x_offest + recon_buffer_16bit->origin_x / 2 +
3513 (pred_buf_y_offest + recon_buffer_16bit->origin_y / 2) *
3514 recon_buffer_16bit->stride_cb;
3515 dst_stride_16bit = recon_buffer_16bit->stride_cb;
3516
3517 dst = recon_buffer_8bit->buffer_cb + pred_buf_x_offest +
3518 recon_buffer_8bit->origin_x / 2 +
3519 (pred_buf_y_offest + recon_buffer_8bit->origin_y / 2) *
3520 recon_buffer_8bit->stride_cb;
3521 dst_stride = recon_buffer_8bit->stride_cb;
3522
3523
3524 svt_convert_16bit_to_8bit(dst_16bit,
3525 dst_stride_16bit,
3526 dst,
3527 dst_stride,
3528 context_ptr->blk_geom->bwidth_uv,
3529 context_ptr->blk_geom->bheight_uv);
3530
3531 dst_16bit = (uint16_t *)(recon_buffer_16bit->buffer_cr) +
3532 (pred_buf_x_offest + recon_buffer_16bit->origin_x / 2 +
3533 (pred_buf_y_offest + recon_buffer_16bit->origin_y / 2) *
3534 recon_buffer_16bit->stride_cr);
3535 dst_stride_16bit = recon_buffer_16bit->stride_cr;
3536 dst = recon_buffer_8bit->buffer_cr + pred_buf_x_offest +
3537 recon_buffer_8bit->origin_x / 2 +
3538 (pred_buf_y_offest + recon_buffer_8bit->origin_y / 2) *
3539 recon_buffer_8bit->stride_cr;
3540 dst_stride = recon_buffer_8bit->stride_cr;
3541
3542
3543 svt_convert_16bit_to_8bit(dst_16bit,
3544 dst_stride_16bit,
3545 dst,
3546 dst_stride,
3547 context_ptr->blk_geom->bwidth_uv,
3548 context_ptr->blk_geom->bheight_uv);
3549 }
3550 update_mi_map_skip_settings(blk_ptr);
3551 if (pcs_ptr->cdf_ctrl.update_se) {
3552 // Update the partition Neighbor Array
3553 PartitionContext partition;
3554 partition.above = partition_context_lookup[blk_geom->bsize].above;
3555 partition.left = partition_context_lookup[blk_geom->bsize].left;
3556
3557 neighbor_array_unit_mode_write(pcs_ptr->ep_partition_context_neighbor_array[tile_idx],
3558 (uint8_t *)&partition,
3559 context_ptr->blk_origin_x,
3560 context_ptr->blk_origin_y,
3561 blk_geom->bwidth,
3562 blk_geom->bheight,
3563 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3564
3565 // Update the CDFs based on the current block
3566 blk_ptr->av1xd->tile_ctx = &pcs_ptr->ec_ctx_array[sb_addr];
3567 update_stats(pcs_ptr,
3568 blk_ptr,
3569 context_ptr->blk_origin_y >> MI_SIZE_LOG2,
3570 context_ptr->blk_origin_x >> MI_SIZE_LOG2);
3571 }
3572
3573 if (dlf_enable_flag) {}
3574
3575 {{// Set the PU Loop Variables
3576 pu_ptr = blk_ptr->prediction_unit_array;
3577 // Set MvUnit
3578 context_ptr->mv_unit.pred_direction = (uint8_t)pu_ptr->inter_pred_direction_index;
3579 context_ptr->mv_unit.mv[REF_LIST_0].mv_union = pu_ptr->mv[REF_LIST_0].mv_union;
3580 context_ptr->mv_unit.mv[REF_LIST_1].mv_union = pu_ptr->mv[REF_LIST_1].mv_union;
3581 }
3582 }
3583
3584 {
3585 sb_ptr->final_blk_arr[final_blk_itr].av1xd = sb_ptr->av1xd;
3586 BlkStruct *src_cu = &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
3587 BlkStruct *dst_cu = &sb_ptr->final_blk_arr[final_blk_itr++];
3588 move_blk_data(pcs_ptr, context_ptr, src_cu, dst_cu);
3589 }
3590 if (scs_ptr->mfmv_enabled && pcs_ptr->slice_type != I_SLICE &&
3591 pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag) {
3592 uint32_t mi_stride = pcs_ptr->mi_stride;
3593 int32_t mi_row = context_ptr->blk_origin_y >> MI_SIZE_LOG2;
3594 int32_t mi_col = context_ptr->blk_origin_x >> MI_SIZE_LOG2;
3595 const int32_t offset = mi_row * mi_stride + mi_col;
3596 ModeInfo * mi_ptr = *(pcs_ptr->mi_grid_base + offset);
3597 const int x_mis = AOMMIN(context_ptr->blk_geom->bwidth >> MI_SIZE_LOG2,
3598 pcs_ptr->parent_pcs_ptr->av1_cm->mi_cols - mi_col);
3599 const int y_mis = AOMMIN(context_ptr->blk_geom->bheight >> MI_SIZE_LOG2,
3600 pcs_ptr->parent_pcs_ptr->av1_cm->mi_rows - mi_row);
3601 EbReferenceObject *obj_l0 =
3602 (EbReferenceObject *)
3603 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr;
3604
3605 av1_copy_frame_mvs(pcs_ptr,
3606 pcs_ptr->parent_pcs_ptr->av1_cm,
3607 mi_ptr->mbmi,
3608 mi_row,
3609 mi_col,
3610 x_mis,
3611 y_mis,
3612 obj_l0);
3613 }
3614 }
3615 blk_it +=
3616 ns_depth_offset[scs_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3617 }
3618 else blk_it +=
3619 d1_depth_offset[scs_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3620 } // CU Loop
3621 // First Pass Deblocking
3622 if (dlf_enable_flag && pcs_ptr->parent_pcs_ptr->loop_filter_mode == 1 && total_tile_cnt == 1) {
3623 //Jing: Don't work for tile_parallel since the SB of bottom tile comes early than the bottom SB of top tile
3624 if (pcs_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params.filter_level[0] ||
3625 pcs_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params.filter_level[1]) {
3626 uint8_t last_col =
3627 ((sb_origin_x) + sb_width == pcs_ptr->parent_pcs_ptr->aligned_width) ? 1 : 0;
3628 loop_filter_sb(recon_buffer, pcs_ptr, sb_origin_y >> 2, sb_origin_x >> 2, 0, 3, last_col);
3629 }
3630 }
3631
3632 return;
3633 }
3634
3635 #if NO_ENCDEC
no_enc_dec_pass(SequenceControlSet * scs_ptr,PictureControlSet * pcs_ptr,SuperBlock * sb_ptr,uint32_t sb_addr,uint32_t sb_origin_x,uint32_t sb_origin_y,uint32_t sb_qp,EncDecContext * context_ptr)3636 EB_EXTERN void no_enc_dec_pass(SequenceControlSet *scs_ptr, PictureControlSet *pcs_ptr,
3637 SuperBlock *sb_ptr, uint32_t sb_addr, uint32_t sb_origin_x,
3638 uint32_t sb_origin_y, uint32_t sb_qp, EncDecContext *context_ptr) {
3639 context_ptr->coded_area_sb = 0;
3640 context_ptr->coded_area_sb_uv = 0;
3641
3642 uint32_t final_blk_itr = 0;
3643
3644 uint32_t blk_it = 0;
3645
3646 while (blk_it < scs_ptr->max_block_cnt) {
3647 BlkStruct *blk_ptr = context_ptr->blk_ptr =
3648 &context_ptr->md_context->md_blk_arr_nsq[blk_it];
3649 PartitionType part = blk_ptr->part;
3650 const BlockGeom *blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_it);
3651
3652 sb_ptr->cu_partition_array[blk_it] = context_ptr->md_context->md_blk_arr_nsq[blk_it].part;
3653
3654 if (part != PARTITION_SPLIT) {
3655 int32_t offset_d1 = ns_blk_offset[(int32_t)part]; //blk_ptr->best_d1_blk; // TOCKECK
3656 int32_t num_d1_block =
3657 ns_blk_num[(int32_t)part]; // context_ptr->blk_geom->totns; // TOCKECK
3658
3659 for (int32_t d1_itr = blk_it + offset_d1; d1_itr < blk_it + offset_d1 + num_d1_block;
3660 d1_itr++) {
3661 const BlockGeom *blk_geom = context_ptr->blk_geom = get_blk_geom_mds(d1_itr);
3662 BlkStruct * blk_ptr = context_ptr->blk_ptr =
3663 &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
3664
3665 blk_ptr->delta_qp = 0;
3666 blk_ptr->qp = pcs_ptr->picture_qp;
3667 sb_ptr->qp = pcs_ptr->picture_qp;
3668
3669 {
3670 BlkStruct *src_cu = &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
3671 BlkStruct *dst_cu = &sb_ptr->final_blk_arr[final_blk_itr++];
3672
3673 move_blk_data(src_cu, dst_cu);
3674 }
3675
3676 //copy coeff
3677 int32_t txb_1d_offset = 0, txb_1d_offset_uv = 0;
3678
3679 int32_t txb_itr = 0;
3680 do {
3681 uint32_t bwidth = context_ptr->blk_geom->tx_width[txb_itr] < 64
3682 ? context_ptr->blk_geom->tx_width[txb_itr]
3683 : 32;
3684 uint32_t bheight = context_ptr->blk_geom->tx_height[txb_itr] < 64
3685 ? context_ptr->blk_geom->tx_height[txb_itr]
3686 : 32;
3687
3688 int32_t *src_ptr =
3689 &(((int32_t *)context_ptr->blk_ptr->coeff_tmp->buffer_y)[txb_1d_offset]);
3690 int32_t *dst_ptr = &(
3691 ((int32_t *)sb_ptr->quantized_coeff->buffer_y)[context_ptr->coded_area_sb]);
3692
3693 uint32_t j;
3694 for (j = 0; j < bheight; j++)
3695 svt_memcpy(
3696 dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
3697 if (context_ptr->blk_geom->has_uv) {
3698 // Cb
3699 bwidth = context_ptr->blk_geom->tx_width_uv[txb_itr];
3700 bheight = context_ptr->blk_geom->tx_height_uv[txb_itr];
3701
3702 src_ptr =
3703 &(((int32_t *)
3704 context_ptr->blk_ptr->coeff_tmp->buffer_cb)[txb_1d_offset_uv]);
3705 dst_ptr = &(((int32_t *)sb_ptr->quantized_coeff
3706 ->buffer_cb)[context_ptr->coded_area_sb_uv]);
3707
3708 for (j = 0; j < bheight; j++)
3709 svt_memcpy(dst_ptr + j * bwidth,
3710 src_ptr + j * bwidth,
3711 bwidth * sizeof(int32_t));
3712 //Cr
3713 src_ptr =
3714 &(((int32_t *)
3715 context_ptr->blk_ptr->coeff_tmp->buffer_cr)[txb_1d_offset_uv]);
3716 dst_ptr = &(((int32_t *)sb_ptr->quantized_coeff
3717 ->buffer_cr)[context_ptr->coded_area_sb_uv]);
3718
3719 for (j = 0; j < bheight; j++)
3720 svt_memcpy(dst_ptr + j * bwidth,
3721 src_ptr + j * bwidth,
3722 bwidth * sizeof(int32_t));
3723 }
3724
3725 context_ptr->coded_area_sb += context_ptr->blk_geom->tx_width[txb_itr] *
3726 context_ptr->blk_geom->tx_height[txb_itr];
3727 if (context_ptr->blk_geom->has_uv)
3728 context_ptr->coded_area_sb_uv +=
3729 context_ptr->blk_geom->tx_width_uv[txb_itr] *
3730 context_ptr->blk_geom->tx_height_uv[txb_itr];
3731
3732 txb_1d_offset += context_ptr->blk_geom->tx_width[txb_itr] *
3733 context_ptr->blk_geom->tx_height[txb_itr];
3734 if (context_ptr->blk_geom->has_uv)
3735 txb_1d_offset_uv += context_ptr->blk_geom->tx_width_uv[txb_itr] *
3736 context_ptr->blk_geom->tx_height_uv[txb_itr];
3737
3738 txb_itr++;
3739 } while (txb_itr < context_ptr->blk_geom->txb_count);
3740
3741 //copy recon
3742 {
3743 EbPictureBufferDesc *ref_pic;
3744 if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag) {
3745 EbReferenceObject *ref_obj =
3746 (EbReferenceObject *)
3747 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr;
3748 ref_pic = ref_obj->reference_picture;
3749 } else
3750 ref_pic = pcs_ptr->recon_picture_ptr;
3751 context_ptr->blk_origin_x = sb_origin_x + context_ptr->blk_geom->origin_x;
3752 context_ptr->blk_origin_y = sb_origin_y + context_ptr->blk_geom->origin_y;
3753
3754 uint32_t bwidth = context_ptr->blk_geom->bwidth;
3755 uint32_t bheight = context_ptr->blk_geom->bheight;
3756
3757 uint8_t *src_ptr = &(((uint8_t *)context_ptr->blk_ptr->recon_tmp->buffer_y)[0]);
3758 uint8_t *dst_ptr =
3759 ref_pic->buffer_y + ref_pic->origin_x + context_ptr->blk_origin_x +
3760 (ref_pic->origin_y + context_ptr->blk_origin_y) * ref_pic->stride_y;
3761
3762 uint32_t j;
3763 for (j = 0; j < bheight; j++)
3764 svt_memcpy(dst_ptr + j * ref_pic->stride_y,
3765 src_ptr + j * 128,
3766 bwidth * sizeof(uint8_t));
3767 if (context_ptr->blk_geom->has_uv) {
3768 bwidth = context_ptr->blk_geom->bwidth_uv;
3769 bheight = context_ptr->blk_geom->bheight_uv;
3770
3771 src_ptr = &(((uint8_t *)context_ptr->blk_ptr->recon_tmp->buffer_cb)[0]);
3772
3773 dst_ptr =
3774 ref_pic->buffer_cb + ref_pic->origin_x / 2 +
3775 ((context_ptr->blk_origin_x >> 3) << 3) / 2 +
3776 (ref_pic->origin_y / 2 + ((context_ptr->blk_origin_y >> 3) << 3) / 2) *
3777 ref_pic->stride_cb;
3778
3779 for (j = 0; j < bheight; j++)
3780 svt_memcpy(dst_ptr + j * ref_pic->stride_cb,
3781 src_ptr + j * 64,
3782 bwidth * sizeof(uint8_t));
3783 src_ptr = &(((uint8_t *)context_ptr->blk_ptr->recon_tmp->buffer_cr)[0]);
3784
3785 dst_ptr =
3786 ref_pic->buffer_cr + ref_pic->origin_x / 2 +
3787 ((context_ptr->blk_origin_x >> 3) << 3) / 2 +
3788 (ref_pic->origin_y / 2 + ((context_ptr->blk_origin_y >> 3) << 3) / 2) *
3789 ref_pic->stride_cr;
3790
3791 for (j = 0; j < bheight; j++)
3792 svt_memcpy(dst_ptr + j * ref_pic->stride_cr,
3793 src_ptr + j * 64,
3794 bwidth * sizeof(uint8_t));
3795 }
3796 }
3797 }
3798 blk_it +=
3799 ns_depth_offset[scs_ptr->sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3800 } else
3801 blk_it +=
3802 d1_depth_offset[scs_ptr->sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3803 } // CU Loop
3804
3805 return;
3806 }
3807 #endif
3808