1 /*
2 * Copyright(c) 2019 Intel Corporation
3 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 *
5 * This source code is subject to the terms of the BSD 2 Clause License and
6 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7 * was not distributed with this source code in the LICENSE file, you can
8 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9 * Media Patent License 1.0 was not distributed with this source code in the
10 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11 */
12 #include <string.h>
13 
14 #include "EbCodingLoop.h"
15 #include "EbUtility.h"
16 #include "EbTransformUnit.h"
17 #include "EbRateDistortionCost.h"
18 #include "EbDeblockingFilter.h"
19 #include "EbPictureOperators.h"
20 #include "EbSegmentation.h"
21 #include "EbEncDecProcess.h"
22 #include "EbSvtAv1ErrorCodes.h"
23 #include "EbTransforms.h"
24 #include "EbInvTransforms.h"
25 #include "EbModeDecisionConfigurationProcess.h"
26 #include "EbEncIntraPrediction.h"
27 #include "aom_dsp_rtcd.h"
28 #include "EbMdRateEstimation.h"
29 #include "EbFullLoop.h"
30 void av1_set_ref_frame(MvReferenceFrame *rf, int8_t ref_frame_type);
31 uint8_t av1_drl_ctx(const CandidateMv *ref_mv_stack, int32_t ref_idx);
32 
33 /*******************************************
34 * set Penalize Skip Flag
35 *
36 * Summary: Set the penalize_skipflag to true
37 * When there is luminance/chrominance change
38 * or in noisy clip with low motion at meduim
39 * varince area
40 *
41 *******************************************/
42 
43 #define S32 32 * 32
44 #define S16 16 * 16
45 #define S8 8 * 8
46 #define S4 4 * 4
47 
48 typedef void (*EbAv1EncodeLoopFuncPtr)(PictureControlSet *pcs_ptr, EncDecContext *context_ptr,
49                                        SuperBlock *sb_ptr, uint32_t origin_x, uint32_t origin_y,
50                                        EbPictureBufferDesc *pred_samples, // no basis/offset
51                                        EbPictureBufferDesc *coeff_samples_sb, // sb based
52                                        EbPictureBufferDesc *residual16bit, // no basis/offset
53                                        EbPictureBufferDesc *transform16bit, // no basis/offset
54                                        EbPictureBufferDesc *inverse_quant_buffer,
55                                        uint32_t *count_non_zero_coeffs, uint32_t component_mask,
56                                        uint16_t *eob);
57 
58 
59 typedef void (*EbAv1GenerateReconFuncPtr)(EncDecContext *context_ptr, uint32_t origin_x,
60                                           uint32_t             origin_y,
61                                           EbPictureBufferDesc *pred_samples, // no basis/offset
62                                           EbPictureBufferDesc *residual16bit, // no basis/offset
63                                           uint32_t component_mask, uint16_t *eob);
64 
65 /*******************************************
66 * Residual Kernel 8-16bit
67     Computes the residual data
68 *******************************************/
residual_kernel(uint8_t * input,uint32_t input_offset,uint32_t input_stride,uint8_t * pred,uint32_t pred_offset,uint32_t pred_stride,int16_t * residual,uint32_t residual_offset,uint32_t residual_stride,EbBool hbd,uint32_t area_width,uint32_t area_height)69 void residual_kernel(uint8_t *input, uint32_t input_offset, uint32_t input_stride, uint8_t *pred,
70                      uint32_t pred_offset, uint32_t pred_stride, int16_t *residual,
71                      uint32_t residual_offset, uint32_t residual_stride, EbBool hbd,
72                      uint32_t area_width, uint32_t area_height) {
73     if (hbd) {
74         svt_residual_kernel16bit(((uint16_t *)input) + input_offset,
75                                  input_stride,
76                                  ((uint16_t *)pred) + pred_offset,
77                                  pred_stride,
78                                  residual + residual_offset,
79                                  residual_stride,
80                                  area_width,
81                                  area_height);
82     } else {
83         svt_residual_kernel8bit(&(input[input_offset]),
84                                 input_stride,
85                                 &(pred[pred_offset]),
86                                 pred_stride,
87                                 residual + residual_offset,
88                                 residual_stride,
89                                 area_width,
90                                 area_height);
91     }
92 }
93 
94 /***************************************************
95 * Update Intra Mode Neighbor Arrays
96 ***************************************************/
encode_pass_update_intra_mode_neighbor_arrays(NeighborArrayUnit * mode_type_neighbor_array,NeighborArrayUnit * intra_luma_mode_neighbor_array,NeighborArrayUnit * intra_chroma_mode_neighbor_array,uint8_t luma_mode,uint8_t chroma_mode,uint32_t origin_x,uint32_t origin_y,uint32_t width,uint32_t height,uint32_t width_uv,uint32_t height_uv,uint32_t component_mask)97 static void encode_pass_update_intra_mode_neighbor_arrays(
98     NeighborArrayUnit *mode_type_neighbor_array, NeighborArrayUnit *intra_luma_mode_neighbor_array,
99     NeighborArrayUnit *intra_chroma_mode_neighbor_array, uint8_t luma_mode, uint8_t chroma_mode,
100     uint32_t origin_x, uint32_t origin_y, uint32_t width, uint32_t height, uint32_t width_uv,
101     uint32_t height_uv, uint32_t component_mask) {
102     uint8_t mode_type = INTRA_MODE;
103 
104     if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
105         // Mode Type Update
106         neighbor_array_unit_mode_write(mode_type_neighbor_array,
107                                        &mode_type,
108                                        origin_x,
109                                        origin_y,
110                                        width,
111                                        height,
112                                        NEIGHBOR_ARRAY_UNIT_FULL_MASK);
113 
114         // Intra Luma Mode Update
115         neighbor_array_unit_mode_write(intra_luma_mode_neighbor_array,
116                                        &luma_mode,
117                                        origin_x,
118                                        origin_y,
119                                        width,
120                                        height,
121                                        NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
122     }
123     if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
124         // Intra Luma Mode Update
125         neighbor_array_unit_mode_write(intra_chroma_mode_neighbor_array,
126                                        &chroma_mode,
127                                        ((origin_x >> 3) << 3) / 2,
128                                        ((origin_y >> 3) << 3) / 2,
129                                        width_uv,
130                                        height_uv,
131                                        NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
132     }
133 
134     return;
135 }
136 
137 /***************************************************
138 * Update Inter Mode Neighbor Arrays
139 ***************************************************/
encode_pass_update_inter_mode_neighbor_arrays(NeighborArrayUnit * mode_type_neighbor_array,NeighborArrayUnit * mv_neighbor_array,NeighborArrayUnit * skipNeighborArray,MvUnit * mv_unit,uint8_t * skip_flag,uint32_t origin_x,uint32_t origin_y,uint32_t bwidth,uint32_t bheight)140 static void encode_pass_update_inter_mode_neighbor_arrays(
141     NeighborArrayUnit *mode_type_neighbor_array, NeighborArrayUnit *mv_neighbor_array,
142     NeighborArrayUnit *skipNeighborArray, MvUnit *mv_unit, uint8_t *skip_flag, uint32_t origin_x,
143     uint32_t origin_y, uint32_t bwidth, uint32_t bheight) {
144     uint8_t mode_type = INTER_MODE;
145 
146     // Mode Type Update
147     neighbor_array_unit_mode_write(mode_type_neighbor_array,
148                                    &mode_type,
149                                    origin_x,
150                                    origin_y,
151                                    bwidth,
152                                    bheight,
153                                    NEIGHBOR_ARRAY_UNIT_FULL_MASK);
154 
155     // Motion Vector Unit
156     neighbor_array_unit_mode_write(mv_neighbor_array,
157                                    (uint8_t *)mv_unit,
158                                    origin_x,
159                                    origin_y,
160                                    bwidth,
161                                    bheight,
162                                    NEIGHBOR_ARRAY_UNIT_FULL_MASK);
163 
164     // Skip Flag
165     neighbor_array_unit_mode_write(skipNeighborArray,
166                                    skip_flag,
167                                    origin_x,
168                                    origin_y,
169                                    bwidth,
170                                    bheight,
171                                    NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
172 
173     return;
174 }
175 
176 /***************************************************
177 * Update Recon Samples Neighbor Arrays
178 ***************************************************/
encode_pass_update_recon_sample_neighbour_arrays(NeighborArrayUnit * lumaReconSampleNeighborArray,NeighborArrayUnit * cbReconSampleNeighborArray,NeighborArrayUnit * crReconSampleNeighborArray,EbPictureBufferDesc * recon_buffer,uint32_t origin_x,uint32_t origin_y,uint32_t width,uint32_t height,uint32_t bwidth_uv,uint32_t bheight_uv,uint32_t component_mask,EbBool is_16bit)179 static void encode_pass_update_recon_sample_neighbour_arrays(
180     NeighborArrayUnit *lumaReconSampleNeighborArray, NeighborArrayUnit *cbReconSampleNeighborArray,
181     NeighborArrayUnit *crReconSampleNeighborArray, EbPictureBufferDesc *recon_buffer,
182     uint32_t origin_x, uint32_t origin_y, uint32_t width, uint32_t height, uint32_t bwidth_uv,
183     uint32_t bheight_uv, uint32_t component_mask, EbBool is_16bit) {
184     uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
185     uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
186 
187     if (is_16bit == EB_TRUE) {
188         if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
189             // Recon Samples - Luma
190             neighbor_array_unit16bit_sample_write(lumaReconSampleNeighborArray,
191                                                   (uint16_t *)(recon_buffer->buffer_y),
192                                                   recon_buffer->stride_y,
193                                                   recon_buffer->origin_x + origin_x,
194                                                   recon_buffer->origin_y + origin_y,
195                                                   origin_x,
196                                                   origin_y,
197                                                   width,
198                                                   height,
199                                                   NEIGHBOR_ARRAY_UNIT_FULL_MASK);
200         }
201 
202         if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
203             // Recon Samples - Cb
204             neighbor_array_unit16bit_sample_write(cbReconSampleNeighborArray,
205                                                   (uint16_t *)(recon_buffer->buffer_cb),
206                                                   recon_buffer->stride_cb,
207                                                   (recon_buffer->origin_x + round_origin_x) >> 1,
208                                                   (recon_buffer->origin_y + round_origin_y) >> 1,
209                                                   round_origin_x >> 1,
210                                                   round_origin_y >> 1,
211                                                   bwidth_uv,
212                                                   bheight_uv,
213                                                   NEIGHBOR_ARRAY_UNIT_FULL_MASK);
214 
215             // Recon Samples - Cr
216             neighbor_array_unit16bit_sample_write(crReconSampleNeighborArray,
217                                                   (uint16_t *)(recon_buffer->buffer_cr),
218                                                   recon_buffer->stride_cr,
219                                                   (recon_buffer->origin_x + round_origin_x) >> 1,
220                                                   (recon_buffer->origin_y + round_origin_y) >> 1,
221                                                   round_origin_x >> 1,
222                                                   round_origin_y >> 1,
223                                                   bwidth_uv,
224                                                   bheight_uv,
225                                                   NEIGHBOR_ARRAY_UNIT_FULL_MASK);
226         }
227     } else {
228         if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
229             // Recon Samples - Luma
230             neighbor_array_unit_sample_write(lumaReconSampleNeighborArray,
231                                              recon_buffer->buffer_y,
232                                              recon_buffer->stride_y,
233                                              recon_buffer->origin_x + origin_x,
234                                              recon_buffer->origin_y + origin_y,
235                                              origin_x,
236                                              origin_y,
237                                              width,
238                                              height,
239                                              NEIGHBOR_ARRAY_UNIT_FULL_MASK);
240         }
241 
242         if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
243             // Recon Samples - Cb
244             neighbor_array_unit_sample_write(cbReconSampleNeighborArray,
245                                              recon_buffer->buffer_cb,
246                                              recon_buffer->stride_cb,
247                                              (recon_buffer->origin_x + round_origin_x) >> 1,
248                                              (recon_buffer->origin_y + round_origin_y) >> 1,
249                                              round_origin_x >> 1,
250                                              round_origin_y >> 1,
251                                              bwidth_uv,
252                                              bheight_uv,
253                                              NEIGHBOR_ARRAY_UNIT_FULL_MASK);
254 
255             // Recon Samples - Cr
256             neighbor_array_unit_sample_write(crReconSampleNeighborArray,
257                                              recon_buffer->buffer_cr,
258                                              recon_buffer->stride_cr,
259                                              (recon_buffer->origin_x + round_origin_x) >> 1,
260                                              (recon_buffer->origin_y + round_origin_y) >> 1,
261                                              round_origin_x >> 1,
262                                              round_origin_y >> 1,
263                                              bwidth_uv,
264                                              bheight_uv,
265                                              NEIGHBOR_ARRAY_UNIT_FULL_MASK);
266         }
267     }
268 
269     return;
270 }
271 
272 /**********************************************************
273 * Encode Loop
274 *
275 * Summary: Performs an AV1 conformant
276 *   Transform, Quantization  and Inverse Quantization of a TU.
277 *
278 * Inputs:
279 *   origin_x
280 *   origin_y
281 *   txb_size
282 *   sb_sz
283 *   input - input samples (position sensitive)
284 *   pred - prediction samples (position independent)
285 *
286 * Outputs:
287 *   Inverse quantized coeff - quantization indices (position sensitive)
288 *
289 **********************************************************/
av1_encode_loop(PictureControlSet * pcs_ptr,EncDecContext * context_ptr,SuperBlock * sb_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * coeff_samples_sb,EbPictureBufferDesc * residual16bit,EbPictureBufferDesc * transform16bit,EbPictureBufferDesc * inverse_quant_buffer,uint32_t * count_non_zero_coeffs,uint32_t component_mask,uint16_t * eob)290 static void av1_encode_loop(PictureControlSet *pcs_ptr, EncDecContext *context_ptr,
291                             SuperBlock *         sb_ptr,
292                             uint32_t             origin_x, //pic based tx org x
293                             uint32_t             origin_y, //pic based tx org y
294                             EbPictureBufferDesc *pred_samples, // no basis/offset
295                             EbPictureBufferDesc *coeff_samples_sb, // sb based
296                             EbPictureBufferDesc *residual16bit, // no basis/offset
297                             EbPictureBufferDesc *transform16bit, // no basis/offset
298                             EbPictureBufferDesc *inverse_quant_buffer,
299                             uint32_t *count_non_zero_coeffs,
300                             uint32_t component_mask, uint16_t *eob) {
301 
302     //    uint32_t                 chroma_qp = cb_qp;
303     BlkStruct *   blk_ptr = context_ptr->blk_ptr;
304     TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
305     //    EB_SLICE               slice_type = sb_ptr->pcs_ptr->slice_type;
306     //    uint32_t                 temporal_layer_index = sb_ptr->pcs_ptr->temporal_layer_index;
307     uint32_t             qindex        = blk_ptr->qindex;
308     EbPictureBufferDesc *input_samples = context_ptr->input_samples;
309 
310     uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
311     uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
312 
313     const uint32_t input_luma_offset =
314         ((origin_y + input_samples->origin_y) * input_samples->stride_y) +
315         (origin_x + input_samples->origin_x);
316     const uint32_t input_cb_offset =
317         (((round_origin_y + input_samples->origin_y) >> 1) * input_samples->stride_cb) +
318         ((round_origin_x + input_samples->origin_x) >> 1);
319     const uint32_t input_cr_offset =
320         (((round_origin_y + input_samples->origin_y) >> 1) * input_samples->stride_cr) +
321         ((round_origin_x + input_samples->origin_x) >> 1);
322     const uint32_t pred_luma_offset =
323         ((pred_samples->origin_y + origin_y) * pred_samples->stride_y) +
324         (pred_samples->origin_x + origin_x);
325     const uint32_t pred_cb_offset =
326         (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cb) +
327         ((pred_samples->origin_x + round_origin_x) >> 1);
328     const uint32_t pred_cr_offset =
329         (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cr) +
330         ((pred_samples->origin_x + round_origin_x) >> 1);
331     int32_t is_inter = (blk_ptr->prediction_mode_flag == INTER_MODE || blk_ptr->use_intrabc)
332                            ? EB_TRUE
333                            : EB_FALSE;
334     const uint32_t scratch_luma_offset =
335         context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] +
336         context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] *
337             SB_STRIDE_Y;
338     const uint32_t scratch_cb_offset =
339         ROUND_UV(
340             context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
341             2 +
342         ROUND_UV(
343             context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
344             2 * SB_STRIDE_UV;
345     const uint32_t scratch_cr_offset =
346         ROUND_UV(
347             context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
348             2 +
349         ROUND_UV(
350             context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
351             2 * SB_STRIDE_UV;
352     const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
353 
354     const uint32_t coeff1d_offset_chroma = context_ptr->coded_area_sb_uv;
355     UNUSED(coeff1d_offset_chroma);
356 
357     context_ptr->three_quad_energy = 0;
358     if (pcs_ptr->parent_pcs_ptr->blk_lambda_tuning) {
359         context_ptr->md_context->blk_geom = context_ptr->blk_geom;
360         context_ptr->md_context->blk_origin_x = context_ptr->blk_origin_x;
361         context_ptr->md_context->blk_origin_y = context_ptr->blk_origin_y;
362         //Get the new lambda for current block
363         set_tuned_blk_lambda(context_ptr->md_context, pcs_ptr);
364     }
365     //**********************************
366     // Luma
367     //**********************************
368     if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
369         component_mask == PICTURE_BUFFER_DESC_LUMA_MASK) {
370         if (context_ptr->md_skip_blk) {
371             count_non_zero_coeffs[0] = 0;
372             eob[0] = 0;
373             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = 0;
374         }
375         else {
376         svt_residual_kernel8bit(
377             input_samples->buffer_y + input_luma_offset,
378             input_samples->stride_y,
379             pred_samples->buffer_y + pred_luma_offset,
380             pred_samples->stride_y,
381             ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
382             residual16bit->stride_y,
383             context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
384             context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr]);
385 
386         av1_estimate_transform(
387             ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
388             residual16bit->stride_y,
389             ((TranLow *)transform16bit->buffer_y) + coeff1d_offset,
390             NOT_USED_VALUE,
391             context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
392             &context_ptr->three_quad_energy,
393             EB_8BIT,
394             txb_ptr->transform_type[PLANE_TYPE_Y],
395             PLANE_TYPE_Y,
396             context_ptr->md_context->pf_ctrls.pf_shape);
397 
398         int32_t seg_qp = pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
399                              ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
400                                    .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
401                              : 0;
402 
403         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
404             sb_ptr->pcs_ptr,
405             context_ptr->md_context,
406             ((TranLow *)transform16bit->buffer_y) + coeff1d_offset,
407             NOT_USED_VALUE,
408             ((int32_t *)coeff_samples_sb->buffer_y) + coeff1d_offset,
409             ((int32_t *)inverse_quant_buffer->buffer_y) + coeff1d_offset,
410             qindex,
411             seg_qp,
412             context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
413             context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
414             context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
415             &eob[0],
416             &(count_non_zero_coeffs[0]),
417             COMPONENT_LUMA,
418             EB_8BIT,
419             txb_ptr->transform_type[PLANE_TYPE_Y],
420             &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
421             context_ptr->md_context->luma_txb_skip_context,
422             context_ptr->md_context->luma_dc_sign_context,
423             blk_ptr->pred_mode,
424             blk_ptr->use_intrabc,
425             context_ptr->md_context->full_lambda_md[EB_8_BIT_MD],
426             EB_TRUE);
427 
428         }
429         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[0] ? EB_TRUE : EB_FALSE;
430 
431         if (count_non_zero_coeffs[0] == 0) {
432             // INTER. Chroma follows Luma in transform type
433             if (blk_ptr->prediction_mode_flag == INTER_MODE) {
434                 txb_ptr->transform_type[PLANE_TYPE_Y]  = DCT_DCT;
435                 txb_ptr->transform_type[PLANE_TYPE_UV] = DCT_DCT;
436             } else { // INTRA
437                 txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
438             }
439         }
440         txb_ptr->nz_coef_count[0] = (uint16_t)count_non_zero_coeffs[0];
441     }
442 
443     if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
444         component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) {
445         if (blk_ptr->prediction_mode_flag == INTRA_MODE && blk_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
446             EbPictureBufferDesc *recon_samples = pred_samples;
447             uint32_t             recon_luma_offset =
448                 (recon_samples->origin_y + round_origin_y) * recon_samples->stride_y +
449                 (recon_samples->origin_x + round_origin_x);
450 
451             // Down sample Luma
452             svt_cfl_luma_subsampling_420_lbd(
453                 recon_samples->buffer_y + recon_luma_offset,
454                 recon_samples->stride_y,
455                 context_ptr->md_context->pred_buf_q3,
456                 context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth
457                     ? (context_ptr->blk_geom->bwidth_uv << 1)
458                     : context_ptr->blk_geom->bwidth,
459                 context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight
460                     ? (context_ptr->blk_geom->bheight_uv << 1)
461                     : context_ptr->blk_geom->bheight);
462             int32_t round_offset =
463                 ((context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) *
464                  (context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr])) /
465                 2;
466 
467             svt_subtract_average(
468                 context_ptr->md_context->pred_buf_q3,
469                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
470                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
471                 round_offset,
472                 svt_log2f(context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) +
473                 svt_log2f(context_ptr->blk_geom
474                               ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]));
475             if (blk_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
476                 int32_t alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
477                                                     blk_ptr->prediction_unit_array->cfl_alpha_signs,
478                                                     CFL_PRED_U); // once for U, once for V
479 
480                 //TOCHANGE
481                 //assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
482 
483                 svt_cfl_predict_lbd(
484                     context_ptr->md_context->pred_buf_q3,
485                     pred_samples->buffer_cb + pred_cb_offset,
486                     pred_samples->stride_cb,
487                     pred_samples->buffer_cb + pred_cb_offset,
488                     pred_samples->stride_cb,
489                     alpha_q3,
490                     8,
491                     context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
492                     context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
493                 alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
494                                             blk_ptr->prediction_unit_array->cfl_alpha_signs,
495                                             CFL_PRED_V); // once for U, once for V
496 
497                 //TOCHANGE
498                 //assert(chroma_size * CFL_BUF_LINE + chroma_size <= CFL_BUF_SQUARE);
499 
500                 svt_cfl_predict_lbd(
501                     context_ptr->md_context->pred_buf_q3,
502                     pred_samples->buffer_cr + pred_cr_offset,
503                     pred_samples->stride_cr,
504                     pred_samples->buffer_cr + pred_cr_offset,
505                     pred_samples->stride_cr,
506                     alpha_q3,
507                     8,
508                     context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
509                     context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
510             }
511         }
512 
513         //**********************************
514         // Chroma
515         //**********************************
516         if (context_ptr->md_skip_blk) {
517             count_non_zero_coeffs[1] = 0;
518             eob[1] = 0;
519            context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = 0;
520             count_non_zero_coeffs[2] = 0;
521             eob[2] = 0;
522            context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = 0;
523         }
524         else {
525 
526             int32_t seg_qp = pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
527                 ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
528                 .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
529                 : 0;
530             //**********************************
531             // Cb
532             //**********************************
533             svt_residual_kernel8bit(
534                 input_samples->buffer_cb + input_cb_offset,
535                 input_samples->stride_cb,
536                 pred_samples->buffer_cb + pred_cb_offset,
537                 pred_samples->stride_cb,
538                 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
539                 residual16bit->stride_cb,
540                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
541                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
542 
543             av1_estimate_transform(
544                 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
545                 residual16bit->stride_cb,
546                 ((TranLow *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
547                 NOT_USED_VALUE,
548                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
549                 &context_ptr->three_quad_energy,
550                 EB_8BIT,
551                 txb_ptr->transform_type[PLANE_TYPE_UV],
552                 PLANE_TYPE_UV,
553                 context_ptr->md_context->pf_ctrls.pf_shape);
554         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = av1_quantize_inv_quantize(
555                 sb_ptr->pcs_ptr,
556                 context_ptr->md_context,
557                 ((TranLow *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
558                 NOT_USED_VALUE,
559                 ((int32_t *)coeff_samples_sb->buffer_cb) + context_ptr->coded_area_sb_uv,
560                 ((int32_t *)inverse_quant_buffer->buffer_cb) + context_ptr->coded_area_sb_uv,
561                 qindex,
562                 seg_qp,
563                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
564                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
565                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
566                 &eob[1],
567                 &(count_non_zero_coeffs[1]),
568                 COMPONENT_CHROMA_CB,
569                 EB_8BIT,
570                 txb_ptr->transform_type[PLANE_TYPE_UV],
571                 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
572                 context_ptr->md_context->cb_txb_skip_context,
573                 context_ptr->md_context->cb_dc_sign_context,
574                 blk_ptr->pred_mode,
575                 blk_ptr->use_intrabc,
576                 context_ptr->md_context->full_lambda_md[EB_8_BIT_MD],
577                 EB_TRUE);
578 
579         //**********************************
580         // Cr
581         //**********************************
582         svt_residual_kernel8bit(
583             input_samples->buffer_cr + input_cr_offset,
584             input_samples->stride_cr,
585             pred_samples->buffer_cr + pred_cr_offset,
586             pred_samples->stride_cr,
587             ((int16_t *)residual16bit->buffer_cr) + scratch_cr_offset,
588             residual16bit->stride_cr,
589             context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
590             context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
591         av1_estimate_transform(
592             ((int16_t *)residual16bit->buffer_cr) + scratch_cb_offset,
593             residual16bit->stride_cr,
594             ((TranLow *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
595             NOT_USED_VALUE,
596             context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
597             &context_ptr->three_quad_energy,
598             EB_8BIT,
599             txb_ptr->transform_type[PLANE_TYPE_UV],
600             PLANE_TYPE_UV,
601             context_ptr->md_context->pf_ctrls.pf_shape);
602         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = av1_quantize_inv_quantize(
603             sb_ptr->pcs_ptr,
604             context_ptr->md_context,
605             ((TranLow *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
606             NOT_USED_VALUE,
607             ((int32_t *)coeff_samples_sb->buffer_cr) + context_ptr->coded_area_sb_uv,
608             ((TranLow *)inverse_quant_buffer->buffer_cr) + context_ptr->coded_area_sb_uv,
609             qindex,
610             seg_qp,
611             context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
612             context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
613             context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
614             &eob[2],
615             &(count_non_zero_coeffs[2]),
616             COMPONENT_CHROMA_CR,
617             EB_8BIT,
618             txb_ptr->transform_type[PLANE_TYPE_UV],
619             &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
620             context_ptr->md_context->cr_txb_skip_context,
621             context_ptr->md_context->cr_dc_sign_context,
622             blk_ptr->pred_mode,
623             blk_ptr->use_intrabc,
624             context_ptr->md_context->full_lambda_md[EB_8_BIT_MD],
625             EB_TRUE);
626         }
627 
628         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[1] ? EB_TRUE : EB_FALSE;
629         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[2] ? EB_TRUE : EB_FALSE;
630 
631         txb_ptr->nz_coef_count[1] = (uint16_t)count_non_zero_coeffs[1];
632         txb_ptr->nz_coef_count[2] = (uint16_t)count_non_zero_coeffs[2];
633     }
634     return;
635 }
636 /**********************************************************
637 * Encode Loop
638 *
639 * Summary: Performs an AV1 conformant
640 *   Transform, Quantization  and Inverse Quantization of a TU.
641 *
642 * Inputs:
643 *   origin_x
644 *   origin_y
645 *   txb_size
646 *   sb_sz
647 *   input - input samples (position sensitive)
648 *   pred - prediction samples (position independent)
649 *
650 * Outputs:
651 *   Inverse quantized coeff - quantization indices (position sensitive)
652 *
653 **********************************************************/
av1_encode_loop_16bit(PictureControlSet * pcs_ptr,EncDecContext * context_ptr,SuperBlock * sb_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * coeff_samples_sb,EbPictureBufferDesc * residual16bit,EbPictureBufferDesc * transform16bit,EbPictureBufferDesc * inverse_quant_buffer,uint32_t * count_non_zero_coeffs,uint32_t component_mask,uint16_t * eob)654 static void av1_encode_loop_16bit(PictureControlSet *pcs_ptr, EncDecContext *context_ptr,
655                                   SuperBlock *sb_ptr, uint32_t origin_x, uint32_t origin_y,
656                                   EbPictureBufferDesc *pred_samples, // no basis/offset
657                                   EbPictureBufferDesc *coeff_samples_sb, // sb based
658                                   EbPictureBufferDesc *residual16bit, // no basis/offset
659                                   EbPictureBufferDesc *transform16bit, // no basis/offset
660                                   EbPictureBufferDesc *inverse_quant_buffer,
661                                   uint32_t *count_non_zero_coeffs, uint32_t component_mask,
662                                   uint16_t *eob)
663 
664 {
665 
666     BlkStruct *   blk_ptr = context_ptr->blk_ptr;
667     TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
668     //    EB_SLICE               slice_type = sb_ptr->pcs_ptr->slice_type;
669     //    uint32_t                 temporal_layer_index = sb_ptr->pcs_ptr->temporal_layer_index;
670     uint32_t             qindex    = blk_ptr->qindex;
671     uint32_t             bit_depth = context_ptr->bit_depth;
672     EbPictureBufferDesc *input_samples16bit = context_ptr->input_sample16bit_buffer;
673     EbPictureBufferDesc *pred_samples16bit  = pred_samples;
674     uint32_t             round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
675     uint32_t             round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
676 
677     int32_t is_inter = (blk_ptr->prediction_mode_flag == INTER_MODE || blk_ptr->use_intrabc)
678                            ? EB_TRUE
679                            : EB_FALSE;
680     const uint32_t input_luma_offset =
681         context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] +
682         context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] *
683             SB_STRIDE_Y;
684     const uint32_t input_cb_offset =
685         ROUND_UV(
686             context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
687             2 +
688         ROUND_UV(
689             context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
690             2 * SB_STRIDE_UV;
691     const uint32_t input_cr_offset =
692         ROUND_UV(
693             context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
694             2 +
695         ROUND_UV(
696             context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
697             2 * SB_STRIDE_UV;
698     const uint32_t pred_luma_offset =
699         ((pred_samples16bit->origin_y + origin_y) * pred_samples16bit->stride_y) +
700         (pred_samples16bit->origin_x + origin_x);
701     const uint32_t pred_cb_offset =
702         (((pred_samples16bit->origin_y + round_origin_y) >> 1) * pred_samples16bit->stride_cb) +
703         ((pred_samples16bit->origin_x + round_origin_x) >> 1);
704     const uint32_t pred_cr_offset =
705         (((pred_samples16bit->origin_y + round_origin_y) >> 1) * pred_samples16bit->stride_cr) +
706         ((pred_samples16bit->origin_x + round_origin_x) >> 1);
707 
708     uint32_t scratch_luma_offset, scratch_cb_offset, scratch_cr_offset;
709 
710     if (bit_depth != EB_8BIT) {
711         scratch_luma_offset =
712             context_ptr->blk_geom->origin_x + context_ptr->blk_geom->origin_y * SB_STRIDE_Y;
713         scratch_cb_offset = ROUND_UV(context_ptr->blk_geom->origin_x) / 2 +
714             ROUND_UV(context_ptr->blk_geom->origin_y) / 2 * SB_STRIDE_UV;
715         scratch_cr_offset = ROUND_UV(context_ptr->blk_geom->origin_x) / 2 +
716             ROUND_UV(context_ptr->blk_geom->origin_y) / 2 * SB_STRIDE_UV;
717     }
718     else {
719         scratch_luma_offset =
720             context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] +
721             context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] *
722             SB_STRIDE_Y;
723         scratch_cb_offset =
724             ROUND_UV(context_ptr->blk_geom
725                 ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
726             2 +
727             ROUND_UV(context_ptr->blk_geom
728                 ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
729             2 * SB_STRIDE_UV;
730         scratch_cr_offset =
731             ROUND_UV(context_ptr->blk_geom
732                 ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
733             2 +
734             ROUND_UV(context_ptr->blk_geom
735                 ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr]) /
736             2 * SB_STRIDE_UV;
737         context_ptr->three_quad_energy = 0;
738     }
739     const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
740     const uint32_t coeff1d_offset_chroma = context_ptr->coded_area_sb_uv;
741     UNUSED(coeff1d_offset_chroma);
742 
743     if (pcs_ptr->parent_pcs_ptr->blk_lambda_tuning) {
744         context_ptr->md_context->blk_geom = context_ptr->blk_geom;
745         context_ptr->md_context->blk_origin_x = context_ptr->blk_origin_x;
746         context_ptr->md_context->blk_origin_y = context_ptr->blk_origin_y;
747         //Get the new lambda for current block
748         set_tuned_blk_lambda(context_ptr->md_context, pcs_ptr);
749     }
750     {
751         //**********************************
752         // Luma
753         //**********************************
754         if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
755             component_mask == PICTURE_BUFFER_DESC_LUMA_MASK) {
756             if (context_ptr->md_skip_blk) {
757                 count_non_zero_coeffs[0] = 0;
758                 eob[0] = 0;
759                 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = 0;
760             }
761             else {
762             svt_residual_kernel16bit(
763                 ((uint16_t *)input_samples16bit->buffer_y) + input_luma_offset,
764                 input_samples16bit->stride_y,
765                 ((uint16_t *)pred_samples16bit->buffer_y) + pred_luma_offset,
766                 pred_samples16bit->stride_y,
767                 ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
768                 residual16bit->stride_y,
769                 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
770                 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr]);
771             av1_estimate_transform(
772                 ((int16_t *)residual16bit->buffer_y) + scratch_luma_offset,
773                 residual16bit->stride_y,
774                 ((TranLow *)transform16bit->buffer_y) + coeff1d_offset,
775                 NOT_USED_VALUE,
776                 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
777                 &context_ptr->three_quad_energy,
778                 bit_depth,
779                 txb_ptr->transform_type[PLANE_TYPE_Y],
780                 PLANE_TYPE_Y,
781                 context_ptr->md_context->pf_ctrls.pf_shape);
782 
783             int32_t seg_qp =
784                 pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
785                     ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
786                           .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
787                     : 0;
788             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = av1_quantize_inv_quantize(
789                 sb_ptr->pcs_ptr,
790                 context_ptr->md_context,
791                 ((int32_t *)transform16bit->buffer_y) + coeff1d_offset,
792                 NOT_USED_VALUE,
793                 ((int32_t *)coeff_samples_sb->buffer_y) + coeff1d_offset,
794                 ((int32_t *)inverse_quant_buffer->buffer_y) + coeff1d_offset,
795                 qindex,
796                 seg_qp,
797                 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
798                 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
799                 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
800                 &eob[0],
801                 &(count_non_zero_coeffs[0]),
802                 COMPONENT_LUMA,
803                 bit_depth,
804                 txb_ptr->transform_type[PLANE_TYPE_Y],
805                 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
806                 context_ptr->md_context->luma_txb_skip_context,
807                 context_ptr->md_context->luma_dc_sign_context,
808                 blk_ptr->pred_mode,
809                 blk_ptr->use_intrabc,
810                 context_ptr->md_context->full_lambda_md[(bit_depth == EB_10BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
811                 EB_TRUE);
812             }
813             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[0] ? EB_TRUE : EB_FALSE;
814 
815             if (count_non_zero_coeffs[0] == 0) {
816                 // INTER. Chroma follows Luma in transform type
817                 if (blk_ptr->prediction_mode_flag == INTER_MODE) {
818                     txb_ptr->transform_type[PLANE_TYPE_Y]  = DCT_DCT;
819                     txb_ptr->transform_type[PLANE_TYPE_UV] = DCT_DCT;
820                 } else { // INTRA
821                     txb_ptr->transform_type[PLANE_TYPE_Y] = DCT_DCT;
822                 }
823             }
824 
825             txb_ptr->nz_coef_count[0] = (uint16_t)count_non_zero_coeffs[0];
826         }
827         if (component_mask == PICTURE_BUFFER_DESC_FULL_MASK ||
828             component_mask == PICTURE_BUFFER_DESC_CHROMA_MASK) {
829 
830         if (blk_ptr->prediction_mode_flag == INTRA_MODE &&
831             blk_ptr->prediction_unit_array->intra_chroma_mode == UV_CFL_PRED) {
832             EbPictureBufferDesc *recon_samples = pred_samples16bit;
833 
834             uint32_t recon_luma_offset =
835                 (recon_samples->origin_y + round_origin_y) * recon_samples->stride_y +
836                 (recon_samples->origin_x + round_origin_x);
837 
838             // Down sample Luma
839             svt_cfl_luma_subsampling_420_hbd(
840                 ((uint16_t *)recon_samples->buffer_y) + recon_luma_offset,
841                 recon_samples->stride_y,
842                 context_ptr->md_context->pred_buf_q3,
843                 context_ptr->blk_geom->bwidth_uv == context_ptr->blk_geom->bwidth
844                     ? (context_ptr->blk_geom->bwidth_uv << 1)
845                     : context_ptr->blk_geom->bwidth,
846                 context_ptr->blk_geom->bheight_uv == context_ptr->blk_geom->bheight
847                     ? (context_ptr->blk_geom->bheight_uv << 1)
848                     : context_ptr->blk_geom->bheight);
849             int32_t round_offset =
850                 ((context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) *
851                  (context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr])) /
852                 2;
853 
854             svt_subtract_average(
855                 context_ptr->md_context->pred_buf_q3,
856                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
857                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
858                 round_offset,
859                 svt_log2f(context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr]) +
860                 svt_log2f(context_ptr->blk_geom
861                               ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]));
862 
863             int32_t alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
864                                                 blk_ptr->prediction_unit_array->cfl_alpha_signs,
865                                                 CFL_PRED_U); // once for U, once for V
866             // TOCHANGE
867             // assert(chroma_size * CFL_BUF_LINE + chroma_size <=                CFL_BUF_SQUARE);
868 
869             svt_cfl_predict_hbd(
870                 context_ptr->md_context->pred_buf_q3,
871                 ((uint16_t *)pred_samples16bit->buffer_cb) + pred_cb_offset,
872                 pred_samples16bit->stride_cb,
873                 ((uint16_t *)pred_samples16bit->buffer_cb) + pred_cb_offset,
874                 pred_samples16bit->stride_cb,
875                 alpha_q3,
876                 context_ptr->bit_depth,
877                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
878                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
879 
880             alpha_q3 = cfl_idx_to_alpha(blk_ptr->prediction_unit_array->cfl_alpha_idx,
881                                         blk_ptr->prediction_unit_array->cfl_alpha_signs,
882                                         CFL_PRED_V); // once for U, once for V
883             // TOCHANGE
884             //assert(chroma_size * CFL_BUF_LINE + chroma_size <=                CFL_BUF_SQUARE);
885 
886             svt_cfl_predict_hbd(
887                 context_ptr->md_context->pred_buf_q3,
888                 ((uint16_t *)pred_samples16bit->buffer_cr) + pred_cr_offset,
889                 pred_samples16bit->stride_cr,
890                 ((uint16_t *)pred_samples16bit->buffer_cr) + pred_cr_offset,
891                 pred_samples16bit->stride_cr,
892                 alpha_q3,
893                 context_ptr->bit_depth,
894                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
895                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
896         }
897 
898             //**********************************
899             // Chroma
900             //**********************************
901         if (context_ptr->md_skip_blk) {
902             count_non_zero_coeffs[1] = 0;
903             eob[1] = 0;
904            context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = 0;
905 
906             count_non_zero_coeffs[2] = 0;
907             eob[2] = 0;
908            context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = 0;
909         }
910         else {
911             int32_t seg_qp =
912                 pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled
913                 ? pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params
914                 .feature_data[context_ptr->blk_ptr->segment_id][SEG_LVL_ALT_Q]
915                 : 0;
916             //**********************************
917             // Cb
918             //**********************************
919             svt_residual_kernel16bit(
920                 ((uint16_t *)input_samples16bit->buffer_cb) + input_cb_offset,
921                 input_samples16bit->stride_cb,
922                 ((uint16_t *)pred_samples16bit->buffer_cb) + pred_cb_offset,
923                 pred_samples16bit->stride_cb,
924                 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
925                 residual16bit->stride_cb,
926                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
927                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
928             av1_estimate_transform(
929                 ((int16_t *)residual16bit->buffer_cb) + scratch_cb_offset,
930                 residual16bit->stride_cb,
931                 ((TranLow *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
932                 NOT_USED_VALUE,
933                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
934                 &context_ptr->three_quad_energy,
935                 bit_depth,
936                 txb_ptr->transform_type[PLANE_TYPE_UV],
937                 PLANE_TYPE_UV,
938                 context_ptr->md_context->pf_ctrls.pf_shape);
939             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = av1_quantize_inv_quantize(
940                 sb_ptr->pcs_ptr,
941                 context_ptr->md_context,
942                 ((int32_t *)transform16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
943                 NOT_USED_VALUE,
944                 ((int32_t *)coeff_samples_sb->buffer_cb) + context_ptr->coded_area_sb_uv,
945                 ((int32_t *)inverse_quant_buffer->buffer_cb) + context_ptr->coded_area_sb_uv,
946                 qindex,
947                 seg_qp,
948                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
949                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
950                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
951                 &eob[1],
952                 &(count_non_zero_coeffs[1]),
953                 COMPONENT_CHROMA_CB,
954                 bit_depth,
955                 txb_ptr->transform_type[PLANE_TYPE_UV],
956                 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
957                 context_ptr->md_context->cb_txb_skip_context,
958                 context_ptr->md_context->cb_dc_sign_context,
959                 blk_ptr->pred_mode,
960                 blk_ptr->use_intrabc,
961                 context_ptr->md_context->full_lambda_md[(bit_depth == EB_10BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
962                 EB_TRUE);
963 
964             //**********************************
965             // Cr
966             //**********************************
967             svt_residual_kernel16bit(
968                 ((uint16_t *)input_samples16bit->buffer_cr) + input_cr_offset,
969                 input_samples16bit->stride_cr,
970                 ((uint16_t *)pred_samples16bit->buffer_cr) + pred_cr_offset,
971                 pred_samples16bit->stride_cr,
972                 ((int16_t *)residual16bit->buffer_cr) + scratch_cr_offset,
973                 residual16bit->stride_cr,
974                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
975                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr]);
976             av1_estimate_transform(
977                 ((int16_t *)residual16bit->buffer_cr) + scratch_cb_offset,
978                 residual16bit->stride_cr,
979                 ((TranLow *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
980                 NOT_USED_VALUE,
981                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
982                 &context_ptr->three_quad_energy,
983                 bit_depth,
984                 txb_ptr->transform_type[PLANE_TYPE_UV],
985                 PLANE_TYPE_UV,
986                 context_ptr->md_context->pf_ctrls.pf_shape);
987             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = av1_quantize_inv_quantize(
988                 sb_ptr->pcs_ptr,
989                 context_ptr->md_context,
990                 ((int32_t *)transform16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
991                 NOT_USED_VALUE,
992                 ((int32_t *)coeff_samples_sb->buffer_cr) + context_ptr->coded_area_sb_uv,
993                 ((int32_t *)inverse_quant_buffer->buffer_cr) + context_ptr->coded_area_sb_uv,
994                 qindex,
995                 seg_qp,
996                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
997                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
998                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
999                 &eob[2],
1000                 &(count_non_zero_coeffs[2]),
1001                 COMPONENT_CHROMA_CR,
1002                 bit_depth,
1003                 txb_ptr->transform_type[PLANE_TYPE_UV],
1004                 &(context_ptr->md_context->candidate_buffer_ptr_array[0][0]),
1005                 context_ptr->md_context->cr_txb_skip_context,
1006                 context_ptr->md_context->cr_dc_sign_context,
1007                 blk_ptr->pred_mode,
1008                 blk_ptr->use_intrabc,
1009                 context_ptr->md_context->full_lambda_md[(bit_depth == EB_10BIT) ? EB_10_BIT_MD : EB_8_BIT_MD],
1010                 EB_TRUE);
1011             }
1012             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[1] ? EB_TRUE : EB_FALSE;
1013             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] = count_non_zero_coeffs[2] ? EB_TRUE : EB_FALSE;
1014 
1015             txb_ptr->nz_coef_count[1] = (uint16_t)count_non_zero_coeffs[1];
1016             txb_ptr->nz_coef_count[2] = (uint16_t)count_non_zero_coeffs[2];
1017         }
1018     }
1019 
1020     return;
1021 }
1022 
1023 /**********************************************************
1024 * Encode Generate Recon
1025 *
1026 * Summary: Performs an AV1 conformant
1027 *   Inverse Transform and generate
1028 *   the reconstructed samples of a TU.
1029 *
1030 * Inputs:
1031 *   origin_x
1032 *   origin_y
1033 *   txb_size
1034 *   sb_sz
1035 *   input - Inverse Qunatized Coeff (position sensitive)
1036 *   pred - prediction samples (position independent)
1037 *
1038 * Outputs:
1039 *   Recon  (position independent)
1040 *
1041 **********************************************************/
av1_encode_generate_recon(EncDecContext * context_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * residual16bit,uint32_t component_mask,uint16_t * eob)1042 static void av1_encode_generate_recon(EncDecContext *context_ptr, uint32_t origin_x,
1043                                       uint32_t             origin_y,
1044                                       EbPictureBufferDesc *pred_samples, // no basis/offset
1045                                       EbPictureBufferDesc *residual16bit, // no basis/offset
1046                                       uint32_t component_mask, uint16_t *eob)
1047 {
1048     BlkStruct *   blk_ptr = context_ptr->blk_ptr;
1049     TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
1050 
1051     // *Note - The prediction is built in-place in the Recon buffer. It is overwritten with Reconstructed
1052     //   samples if the CBF==1 && SKIP==False
1053 
1054     //**********************************
1055     // Luma
1056     //**********************************
1057     if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1058         {
1059             uint32_t pred_luma_offset = (pred_samples->origin_y + origin_y) *
1060                     pred_samples->stride_y +
1061                 (pred_samples->origin_x + origin_x);
1062             if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
1063                         .y_has_coeff[context_ptr->txb_itr] == EB_TRUE &&
1064                 blk_ptr->skip_flag == EB_FALSE) {
1065                 uint8_t *pred_buffer = pred_samples->buffer_y + pred_luma_offset;
1066                 av1_inv_transform_recon8bit(
1067                     ((int32_t *)residual16bit->buffer_y) + context_ptr->coded_area_sb,
1068                     pred_buffer,
1069                     pred_samples->stride_y,
1070                     pred_buffer,
1071                     pred_samples->stride_y,
1072                     context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1073                     txb_ptr->transform_type[PLANE_TYPE_Y],
1074                     PLANE_TYPE_Y,
1075                     eob[0],
1076                     0 /*lossless*/
1077                 );
1078             }
1079         }
1080     }
1081 
1082     if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1083         //**********************************
1084         // Chroma
1085         //**********************************
1086 
1087         uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
1088         uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
1089         uint32_t pred_chroma_offset = (((pred_samples->origin_y + round_origin_y) >> 1) *
1090                                        pred_samples->stride_cb) +
1091             ((pred_samples->origin_x + round_origin_x) >> 1);
1092 
1093         //**********************************
1094         // Cb
1095         //**********************************
1096         if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
1097                     .u_has_coeff[context_ptr->txb_itr] == EB_TRUE &&
1098             blk_ptr->skip_flag == EB_FALSE) {
1099             uint8_t *pred_buffer = pred_samples->buffer_cb + pred_chroma_offset;
1100 
1101             av1_inv_transform_recon8bit(
1102                 ((int32_t *)residual16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
1103                 pred_buffer,
1104                 pred_samples->stride_cb,
1105                 pred_buffer,
1106                 pred_samples->stride_cb,
1107                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1108                 txb_ptr->transform_type[PLANE_TYPE_UV],
1109                 PLANE_TYPE_UV,
1110                 eob[1],
1111                 0 /*lossless*/);
1112         }
1113 
1114         //**********************************
1115         // Cr
1116         //**********************************
1117         pred_chroma_offset =
1118             (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cr) +
1119             ((pred_samples->origin_x + round_origin_x) >> 1);
1120 
1121         if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
1122                     .v_has_coeff[context_ptr->txb_itr] == EB_TRUE &&
1123             blk_ptr->skip_flag == EB_FALSE) {
1124             uint8_t *pred_buffer = pred_samples->buffer_cr + pred_chroma_offset;
1125 
1126             av1_inv_transform_recon8bit(
1127                 ((int32_t *)residual16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
1128                 pred_buffer,
1129                 pred_samples->stride_cr,
1130                 pred_buffer,
1131                 pred_samples->stride_cr,
1132                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1133                 txb_ptr->transform_type[PLANE_TYPE_UV],
1134                 PLANE_TYPE_UV,
1135                 eob[2],
1136                 0 /*lossless*/);
1137         }
1138     }
1139 
1140     return;
1141 }
1142 
1143 /**********************************************************
1144 * Encode Generate Recon
1145 *
1146 * Summary: Performs an AV1 conformant
1147 *   Inverse Transform and generate
1148 *   the reconstructed samples of a TU.
1149 *
1150 * Inputs:
1151 *   origin_x
1152 *   origin_y
1153 *   txb_size
1154 *   sb_sz
1155 *   input - Inverse Qunatized Coeff (position sensitive)
1156 *   pred - prediction samples (position independent)
1157 *
1158 * Outputs:
1159 *   Recon  (position independent)
1160 *
1161 **********************************************************/
av1_encode_generate_recon_16bit(EncDecContext * context_ptr,uint32_t origin_x,uint32_t origin_y,EbPictureBufferDesc * pred_samples,EbPictureBufferDesc * residual16bit,uint32_t component_mask,uint16_t * eob)1162 static void av1_encode_generate_recon_16bit(EncDecContext *context_ptr, uint32_t origin_x,
1163                                             uint32_t             origin_y,
1164                                             EbPictureBufferDesc *pred_samples, // no basis/offset
1165                                             EbPictureBufferDesc *residual16bit, // no basis/offset
1166                                             uint32_t component_mask, uint16_t *eob) {
1167     BlkStruct *   blk_ptr = context_ptr->blk_ptr;
1168     TransformUnit *txb_ptr = &blk_ptr->txb_array[context_ptr->txb_itr];
1169 
1170     //**********************************
1171     // Luma
1172     //**********************************
1173     if (component_mask & PICTURE_BUFFER_DESC_LUMA_MASK) {
1174         {
1175             uint32_t pred_luma_offset = (pred_samples->origin_y + origin_y) *
1176                     pred_samples->stride_y +
1177                 (pred_samples->origin_x + origin_x);
1178             if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] == EB_TRUE && blk_ptr->skip_flag == EB_FALSE) {
1179 
1180                 uint16_t *pred_buffer = ((uint16_t *)pred_samples->buffer_y) + pred_luma_offset;
1181                 av1_inv_transform_recon(
1182                     ((int32_t *)residual16bit->buffer_y) + context_ptr->coded_area_sb,
1183                     CONVERT_TO_BYTEPTR(pred_buffer),
1184                     pred_samples->stride_y,
1185                     CONVERT_TO_BYTEPTR(pred_buffer),
1186                     pred_samples->stride_y,
1187                     context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1188                     context_ptr->bit_depth,
1189                     txb_ptr->transform_type[PLANE_TYPE_Y],
1190                     PLANE_TYPE_Y,
1191                     eob[0],
1192                     0 /*lossless*/
1193                 );
1194             }
1195         }
1196     }
1197 
1198     if (component_mask & PICTURE_BUFFER_DESC_CHROMA_MASK) {
1199         //**********************************
1200         // Chroma
1201         //**********************************
1202 
1203         //**********************************
1204         // Cb
1205         //**********************************
1206 
1207         uint32_t round_origin_x = (origin_x >> 3) << 3; // for Chroma blocks with size of 4
1208         uint32_t round_origin_y = (origin_y >> 3) << 3; // for Chroma blocks with size of 4
1209 
1210         uint32_t pred_chroma_offset = (((pred_samples->origin_y + round_origin_y) >> 1) *
1211                                        pred_samples->stride_cb) +
1212             ((pred_samples->origin_x + round_origin_x) >> 1);
1213 
1214         if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] == EB_TRUE && blk_ptr->skip_flag == EB_FALSE) {
1215 
1216             uint16_t *pred_buffer = ((uint16_t *)pred_samples->buffer_cb) + pred_chroma_offset;
1217             av1_inv_transform_recon(
1218                 ((int32_t *)residual16bit->buffer_cb) + context_ptr->coded_area_sb_uv,
1219                 CONVERT_TO_BYTEPTR(pred_buffer),
1220                 pred_samples->stride_cb,
1221                 CONVERT_TO_BYTEPTR(pred_buffer),
1222                 pred_samples->stride_cb,
1223                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1224                 context_ptr->bit_depth,
1225                 txb_ptr->transform_type[PLANE_TYPE_UV],
1226                 PLANE_TYPE_UV,
1227                 eob[1],
1228                 0 /*lossless*/);
1229         }
1230 
1231         //**********************************
1232         // Cr
1233         //**********************************
1234         pred_chroma_offset =
1235             (((pred_samples->origin_y + round_origin_y) >> 1) * pred_samples->stride_cr) +
1236             ((pred_samples->origin_x + round_origin_x) >> 1);
1237         if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] == EB_TRUE && blk_ptr->skip_flag == EB_FALSE) {
1238 
1239             uint16_t *pred_buffer = ((uint16_t *)pred_samples->buffer_cr) + pred_chroma_offset;
1240             av1_inv_transform_recon(
1241                 ((int32_t *)residual16bit->buffer_cr) + context_ptr->coded_area_sb_uv,
1242                 CONVERT_TO_BYTEPTR(pred_buffer),
1243                 pred_samples->stride_cr,
1244                 CONVERT_TO_BYTEPTR(pred_buffer),
1245                 pred_samples->stride_cr,
1246                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1247                 context_ptr->bit_depth,
1248                 txb_ptr->transform_type[PLANE_TYPE_UV],
1249                 PLANE_TYPE_UV,
1250                 eob[2],
1251                 0 /*lossless*/);
1252         }
1253     }
1254 
1255     return;
1256 }
1257 static EbAv1EncodeLoopFuncPtr av1_encode_loop_func_table[2] = {av1_encode_loop,
1258                                                                av1_encode_loop_16bit};
1259 
1260 EbAv1GenerateReconFuncPtr av1_enc_gen_recon_func_ptr[2] = {av1_encode_generate_recon,
1261                                                            av1_encode_generate_recon_16bit};
1262 
store16bit_input_src(EbPictureBufferDesc * input_sample16bit_buffer,PictureControlSet * pcs_ptr,uint32_t sb_x,uint32_t sb_y,uint32_t sb_w,uint32_t sb_h)1263 void store16bit_input_src(EbPictureBufferDesc *input_sample16bit_buffer, PictureControlSet *pcs_ptr,
1264                           uint32_t sb_x, uint32_t sb_y, uint32_t sb_w, uint32_t sb_h) {
1265     uint32_t  row_it;
1266     uint16_t *from_ptr;
1267     uint16_t *to_ptr;
1268 
1269     from_ptr = (uint16_t *)input_sample16bit_buffer->buffer_y;
1270     to_ptr   = (uint16_t *)pcs_ptr->input_frame16bit->buffer_y +
1271              (sb_x + pcs_ptr->input_frame16bit->origin_x) +
1272              (sb_y + pcs_ptr->input_frame16bit->origin_y) * pcs_ptr->input_frame16bit->stride_y;
1273 
1274     for (row_it = 0; row_it < sb_h; row_it++)
1275         svt_memcpy(to_ptr + row_it * pcs_ptr->input_frame16bit->stride_y,
1276                from_ptr + row_it * input_sample16bit_buffer->stride_y,
1277                sb_w * 2);
1278 
1279     sb_x = sb_x / 2;
1280     sb_y = sb_y / 2;
1281     sb_w = sb_w / 2;
1282     sb_h = sb_h / 2;
1283 
1284     from_ptr = (uint16_t *)input_sample16bit_buffer->buffer_cb;
1285     to_ptr =
1286         (uint16_t *)pcs_ptr->input_frame16bit->buffer_cb +
1287         (sb_x + pcs_ptr->input_frame16bit->origin_x / 2) +
1288         (sb_y + pcs_ptr->input_frame16bit->origin_y / 2) * pcs_ptr->input_frame16bit->stride_cb;
1289 
1290     for (row_it = 0; row_it < sb_h; row_it++)
1291         svt_memcpy(to_ptr + row_it * pcs_ptr->input_frame16bit->stride_cb,
1292                from_ptr + row_it * input_sample16bit_buffer->stride_cb,
1293                sb_w * 2);
1294 
1295     from_ptr = (uint16_t *)input_sample16bit_buffer->buffer_cr;
1296     to_ptr =
1297         (uint16_t *)pcs_ptr->input_frame16bit->buffer_cr +
1298         (sb_x + pcs_ptr->input_frame16bit->origin_x / 2) +
1299         (sb_y + pcs_ptr->input_frame16bit->origin_y / 2) * pcs_ptr->input_frame16bit->stride_cb;
1300 
1301     for (row_it = 0; row_it < sb_h; row_it++)
1302         svt_memcpy(to_ptr + row_it * pcs_ptr->input_frame16bit->stride_cr,
1303                from_ptr + row_it * input_sample16bit_buffer->stride_cr,
1304                sb_w * 2);
1305 }
1306 
1307 void update_mi_map_skip_settings(BlkStruct *blk_ptr);
1308 void move_blk_data(PictureControlSet *pcs, EncDecContext *context_ptr, BlkStruct *src_cu,
1309                    BlkStruct *dst_cu);
1310 
perform_intra_coding_loop(PictureControlSet * pcs_ptr,SuperBlock * sb_ptr,uint32_t sb_addr,BlkStruct * blk_ptr,PredictionUnit * pu_ptr,EncDecContext * context_ptr)1311 void perform_intra_coding_loop(PictureControlSet *pcs_ptr, SuperBlock *sb_ptr, uint32_t sb_addr,
1312                                BlkStruct *blk_ptr, PredictionUnit *pu_ptr,
1313                                EncDecContext *context_ptr) {
1314     EbBool is_16bit = context_ptr->is_16bit;
1315     uint32_t bit_depth = context_ptr->bit_depth;
1316     uint8_t is_inter = 0; // set to 0 b/c this is the intra path
1317     EbPictureBufferDesc *recon_buffer;
1318     EbPictureBufferDesc *coeff_buffer_sb = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->quantized_coeff[sb_addr];
1319     uint16_t tile_idx = context_ptr->tile_index;
1320     NeighborArrayUnit *ep_luma_recon_neighbor_array =
1321         is_16bit ? pcs_ptr->ep_luma_recon_neighbor_array16bit[tile_idx]
1322                  : pcs_ptr->ep_luma_recon_neighbor_array[tile_idx];
1323     NeighborArrayUnit *ep_cb_recon_neighbor_array =
1324         is_16bit ? pcs_ptr->ep_cb_recon_neighbor_array16bit[tile_idx]
1325                  : pcs_ptr->ep_cb_recon_neighbor_array[tile_idx];
1326     NeighborArrayUnit *ep_cr_recon_neighbor_array =
1327         is_16bit ? pcs_ptr->ep_cr_recon_neighbor_array16bit[tile_idx]
1328                  : pcs_ptr->ep_cr_recon_neighbor_array[tile_idx];
1329 
1330     EbPictureBufferDesc *residual_buffer           = context_ptr->residual_buffer;
1331     EbPictureBufferDesc *transform_buffer          = context_ptr->transform_buffer;
1332     EbPictureBufferDesc *inverse_quant_buffer      = context_ptr->inverse_quant_buffer;
1333 
1334     uint32_t        count_non_zero_coeffs[3];
1335     uint16_t        eobs[MAX_TXB_COUNT][3];
1336     uint64_t        y_txb_coeff_bits;
1337     uint64_t        cb_txb_coeff_bits;
1338     uint64_t        cr_txb_coeff_bits;
1339 
1340     if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
1341         //get the 16bit form of the input SB
1342         if (is_16bit)
1343             recon_buffer = ((EbReferenceObject *)
1344                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
1345                                ->reference_picture16bit;
1346         else
1347             recon_buffer = ((EbReferenceObject *)
1348                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
1349                                ->reference_picture;
1350     else // non ref pictures
1351         recon_buffer = is_16bit ? pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr : pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
1352     uint32_t tot_tu = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
1353 
1354     // Luma path
1355     for (context_ptr->txb_itr = 0; context_ptr->txb_itr < tot_tu; context_ptr->txb_itr++) {
1356         uint16_t txb_origin_x =
1357             context_ptr->blk_origin_x +
1358             context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1359             context_ptr->blk_geom->origin_x;
1360         uint16_t txb_origin_y =
1361             context_ptr->blk_origin_y +
1362             context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1363             context_ptr->blk_geom->origin_y;
1364         context_ptr->md_context->luma_txb_skip_context = 0;
1365         context_ptr->md_context->luma_dc_sign_context  = 0;
1366         get_txb_ctx(pcs_ptr,
1367                     COMPONENT_LUMA,
1368                     pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
1369                     txb_origin_x,
1370                     txb_origin_y,
1371                     context_ptr->blk_geom->bsize,
1372                     context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1373                     &context_ptr->md_context->luma_txb_skip_context,
1374                     &context_ptr->md_context->luma_dc_sign_context);
1375         if (is_16bit) {
1376             uint16_t       top_neigh_array[64 * 2 + 1];
1377             uint16_t       left_neigh_array[64 * 2 + 1];
1378             PredictionMode mode;
1379 
1380             TxSize tx_size = context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1381 
1382             if (txb_origin_y != 0)
1383                 svt_memcpy(top_neigh_array + 1,
1384                        (uint16_t *)(ep_luma_recon_neighbor_array->top_array) + txb_origin_x,
1385                        context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr] *
1386                            2 * sizeof(uint16_t));
1387             if (txb_origin_x != 0)
1388                 svt_memcpy(left_neigh_array + 1,
1389                        (uint16_t *)(ep_luma_recon_neighbor_array->left_array) + txb_origin_y,
1390                        context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr] *
1391                            2 * sizeof(uint16_t));
1392             if (txb_origin_y != 0 && txb_origin_x != 0)
1393                 top_neigh_array[0] = left_neigh_array[0] =
1394                     ((uint16_t *)(ep_luma_recon_neighbor_array->top_left_array) +
1395                      MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y)[0];
1396 
1397             mode = blk_ptr->pred_mode;
1398 
1399             svt_av1_predict_intra_block_16bit(
1400                 bit_depth,
1401                 ED_STAGE,
1402                 context_ptr->blk_geom,
1403                 context_ptr->blk_ptr->av1xd,
1404                 context_ptr->blk_geom->bwidth,
1405                 context_ptr->blk_geom->bheight,
1406                 tx_size,
1407                 mode,
1408                 pu_ptr->angle_delta[PLANE_TYPE_Y],
1409                 blk_ptr->palette_info.pmi.palette_size[0] > 0,
1410                 &blk_ptr->palette_info,
1411                 blk_ptr->filter_intra_mode,
1412                 top_neigh_array + 1,
1413                 left_neigh_array + 1,
1414                 recon_buffer,
1415                 (context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1416                  context_ptr->blk_geom->origin_x) >>
1417                     2,
1418                 (context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1419                  context_ptr->blk_geom->origin_y) >>
1420                     2,
1421                 0,
1422                 context_ptr->blk_geom->bsize,
1423                 txb_origin_x,
1424                 txb_origin_y,
1425                 context_ptr->blk_origin_x,
1426                 context_ptr->blk_origin_y,
1427                 0,
1428                 0,
1429                 &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header);
1430         } else {
1431             uint8_t        top_neigh_array[64 * 2 + 1];
1432             uint8_t        left_neigh_array[64 * 2 + 1];
1433             PredictionMode mode;
1434 
1435             TxSize tx_size = context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1436 
1437             if (txb_origin_y != 0)
1438                 svt_memcpy(
1439                     top_neigh_array + 1,
1440                     ep_luma_recon_neighbor_array->top_array + txb_origin_x,
1441                     context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr] * 2);
1442 
1443             if (txb_origin_x != 0)
1444                 svt_memcpy(
1445                     left_neigh_array + 1,
1446                     ep_luma_recon_neighbor_array->left_array + txb_origin_y,
1447                     context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr] * 2);
1448 
1449             if (txb_origin_y != 0 && txb_origin_x != 0)
1450                 top_neigh_array[0] = left_neigh_array[0] =
1451                     ep_luma_recon_neighbor_array
1452                         ->top_left_array[MAX_PICTURE_HEIGHT_SIZE + txb_origin_x - txb_origin_y];
1453 
1454             mode = blk_ptr->pred_mode;
1455 
1456             // Hsan: if CHROMA_MODE_2, then CFL will be evaluated @ EP as no CHROMA @ MD
1457             // If that's the case then you should ensure than the 1st chroma prediction uses UV_DC_PRED (that's the default configuration for CHROMA_MODE_2 if CFL applicable (set @ fast loop candidates injection) then MD assumes chroma mode always UV_DC_PRED)
1458             svt_av1_predict_intra_block(
1459                 ED_STAGE,
1460                 context_ptr->blk_geom,
1461                 blk_ptr->av1xd,
1462                 context_ptr->blk_geom->bwidth,
1463                 context_ptr->blk_geom->bheight,
1464                 tx_size,
1465                 mode,
1466                 pu_ptr->angle_delta[PLANE_TYPE_Y],
1467                 blk_ptr->palette_info.pmi.palette_size[0] > 0,
1468                 &blk_ptr->palette_info,
1469                 blk_ptr->filter_intra_mode,
1470                 top_neigh_array + 1,
1471                 left_neigh_array + 1,
1472                 recon_buffer,
1473                 (context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1474                  context_ptr->blk_geom->origin_x) >>
1475                     2,
1476                 (context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1477                  context_ptr->blk_geom->origin_y) >>
1478                     2,
1479                 0,
1480                 context_ptr->blk_geom->bsize,
1481                 txb_origin_x,
1482                 txb_origin_y,
1483                 context_ptr->blk_origin_x,
1484                 context_ptr->blk_origin_y,
1485                 0,
1486                 0,
1487                 &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header );
1488         }
1489         // Encode Transform Unit -INTRA-
1490         av1_encode_loop_func_table[is_16bit](pcs_ptr,
1491                                              context_ptr,
1492                                              sb_ptr,
1493                                              txb_origin_x,
1494                                              txb_origin_y,
1495                                              recon_buffer,
1496                                              coeff_buffer_sb,
1497                                              residual_buffer,
1498                                              transform_buffer,
1499                                              inverse_quant_buffer,
1500                                              count_non_zero_coeffs,
1501                                              PICTURE_BUFFER_DESC_LUMA_MASK,
1502                                             eobs[context_ptr->txb_itr]);
1503 
1504         if (pcs_ptr->cdf_ctrl.update_coef) {
1505             ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
1506                 context_ptr->md_context->candidate_buffer_ptr_array;
1507             ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
1508                 &(candidate_buffer_ptr_array_base[0]);
1509             ModeDecisionCandidateBuffer *candidate_buffer;
1510 
1511             // Set the Candidate Buffer
1512             candidate_buffer = candidate_buffer_ptr_array[0];
1513             // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
1514             candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] =
1515                 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y];
1516             candidate_buffer->candidate_ptr->transform_type_uv =
1517                 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV];
1518             candidate_buffer->candidate_ptr->type              = blk_ptr->prediction_mode_flag;
1519             candidate_buffer->candidate_ptr->pred_mode         = blk_ptr->pred_mode;
1520             candidate_buffer->candidate_ptr->filter_intra_mode = blk_ptr->filter_intra_mode;
1521             const uint32_t coeff1d_offset                      = context_ptr->coded_area_sb;
1522 
1523             av1_txb_estimate_coeff_bits(
1524                 context_ptr->md_context,
1525                 1, //allow_update_cdf,
1526                 &pcs_ptr->ec_ctx_array[sb_addr],
1527                 pcs_ptr,
1528                 candidate_buffer,
1529                 coeff1d_offset,
1530                 context_ptr->coded_area_sb_uv,
1531                 coeff_buffer_sb,
1532                 eobs[context_ptr->txb_itr][0],
1533                 eobs[context_ptr->txb_itr][1],
1534                 eobs[context_ptr->txb_itr][2],
1535                 &y_txb_coeff_bits,
1536                 &cb_txb_coeff_bits,
1537                 &cr_txb_coeff_bits,
1538                 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1539                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1540                 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
1541                 candidate_buffer->candidate_ptr->transform_type_uv,
1542                 COMPONENT_LUMA);
1543         }
1544 
1545         av1_enc_gen_recon_func_ptr[is_16bit](context_ptr,
1546                                              txb_origin_x,
1547                                              txb_origin_y,
1548                                              recon_buffer,
1549                                              inverse_quant_buffer,
1550                                              PICTURE_BUFFER_DESC_LUMA_MASK,
1551                                              eobs[context_ptr->txb_itr]);
1552 
1553         // Update Recon Samples-INTRA-
1554         encode_pass_update_recon_sample_neighbour_arrays(
1555             ep_luma_recon_neighbor_array,
1556             ep_cb_recon_neighbor_array,
1557             ep_cr_recon_neighbor_array,
1558             recon_buffer,
1559             txb_origin_x,
1560             txb_origin_y,
1561             context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
1562             context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
1563             context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1564             context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1565             PICTURE_BUFFER_DESC_LUMA_MASK,
1566             is_16bit);
1567 
1568         context_ptr->coded_area_sb +=
1569             context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr] *
1570             context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr];
1571 
1572         // Update the luma Dc Sign Level Coeff Neighbor Array
1573         {
1574             uint8_t dc_sign_level_coeff = (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
1575             neighbor_array_unit_mode_write(
1576                 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
1577                 (uint8_t *)&dc_sign_level_coeff,
1578                 txb_origin_x,
1579                 txb_origin_y,
1580                 context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
1581                 context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
1582                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1583         }
1584 
1585     } // Transform Loop
1586 
1587     // Chroma path
1588 
1589     if (context_ptr->blk_geom->has_uv) {
1590         context_ptr->txb_itr = 0;
1591         uint16_t txb_origin_x =
1592             context_ptr->blk_origin_x +
1593             context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1594             context_ptr->blk_geom->origin_x;
1595         uint16_t txb_origin_y =
1596             context_ptr->blk_origin_y +
1597             context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1598             context_ptr->blk_geom->origin_y;
1599         uint32_t blk_originx_uv = (context_ptr->blk_origin_x >> 3 << 3) >> 1;
1600         uint32_t blk_originy_uv = (context_ptr->blk_origin_y >> 3 << 3) >> 1;
1601 
1602         context_ptr->md_context->cb_txb_skip_context = 0;
1603         context_ptr->md_context->cb_dc_sign_context  = 0;
1604         get_txb_ctx(pcs_ptr,
1605                     COMPONENT_CHROMA,
1606                     pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
1607                     blk_originx_uv,
1608                     blk_originy_uv,
1609                     context_ptr->blk_geom->bsize_uv,
1610                     context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1611                     &context_ptr->md_context->cb_txb_skip_context,
1612                     &context_ptr->md_context->cb_dc_sign_context);
1613 
1614         context_ptr->md_context->cr_txb_skip_context = 0;
1615         context_ptr->md_context->cr_dc_sign_context  = 0;
1616         get_txb_ctx(pcs_ptr,
1617             COMPONENT_CHROMA,
1618             pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
1619             blk_originx_uv,
1620             blk_originy_uv,
1621             context_ptr->blk_geom->bsize_uv,
1622             context_ptr->blk_geom->txsize_uv[context_ptr->blk_ptr->tx_depth][context_ptr->txb_itr],
1623             &context_ptr->md_context->cr_txb_skip_context,
1624             &context_ptr->md_context->cr_dc_sign_context);
1625 
1626         if (is_16bit) {
1627             uint16_t       top_neigh_array[64 * 2 + 1];
1628             uint16_t       left_neigh_array[64 * 2 + 1];
1629             PredictionMode mode;
1630 
1631             int32_t plane_end = 2;
1632 
1633             for (int32_t plane = 1; plane <= plane_end; ++plane) {
1634                 TxSize tx_size =
1635                     plane
1636                         ? context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr]
1637                         : context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1638 
1639                 if (plane == 1) {
1640                     if (blk_originy_uv != 0)
1641                         svt_memcpy(top_neigh_array + 1,
1642                                (uint16_t *)(ep_cb_recon_neighbor_array->top_array) + blk_originx_uv,
1643                                context_ptr->blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
1644                     if (blk_originx_uv != 0)
1645                         svt_memcpy(left_neigh_array + 1,
1646                                (uint16_t *)(ep_cb_recon_neighbor_array->left_array) + blk_originy_uv,
1647                                context_ptr->blk_geom->bheight_uv * 2 * sizeof(uint16_t));
1648                     if (blk_originy_uv != 0 && blk_originx_uv != 0)
1649                         top_neigh_array[0] = left_neigh_array[0] =
1650                             ((uint16_t *)(ep_cb_recon_neighbor_array->top_left_array) +
1651                              MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv - blk_originy_uv)[0];
1652                 } else if (plane == 2) {
1653                     if (blk_originy_uv != 0)
1654                         svt_memcpy(top_neigh_array + 1,
1655                                (uint16_t *)(ep_cr_recon_neighbor_array->top_array) + blk_originx_uv,
1656                                context_ptr->blk_geom->bwidth_uv * 2 * sizeof(uint16_t));
1657                     if (blk_originx_uv != 0)
1658                         svt_memcpy(left_neigh_array + 1,
1659                                (uint16_t *)(ep_cr_recon_neighbor_array->left_array) + blk_originy_uv,
1660                                context_ptr->blk_geom->bheight_uv * 2 * sizeof(uint16_t));
1661                     if (blk_originy_uv != 0 && blk_originx_uv != 0)
1662                         top_neigh_array[0] = left_neigh_array[0] =
1663                             ((uint16_t *)(ep_cr_recon_neighbor_array->top_left_array) +
1664                              MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv - blk_originy_uv)[0];
1665                 }
1666 
1667                 mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED)
1668                            ? (PredictionMode)UV_DC_PRED
1669                            : (PredictionMode)pu_ptr->intra_chroma_mode;
1670 
1671                 svt_av1_predict_intra_block_16bit(
1672                     bit_depth,
1673                     ED_STAGE,
1674                     context_ptr->blk_geom,
1675                     context_ptr->blk_ptr->av1xd,
1676                     plane ? context_ptr->blk_geom->bwidth_uv : context_ptr->blk_geom->bwidth,
1677                     plane ? context_ptr->blk_geom->bheight_uv : context_ptr->blk_geom->bheight,
1678                     tx_size,
1679                     mode,
1680                     plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
1681                     0, //chroma
1682                     &blk_ptr->palette_info,
1683                     FILTER_INTRA_MODES,
1684                     top_neigh_array + 1,
1685                     left_neigh_array + 1,
1686                     recon_buffer,
1687                     plane ? 0
1688                           : (context_ptr->blk_geom
1689                                  ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1690                              context_ptr->blk_geom->origin_x) >>
1691                             2,
1692                     plane ? 0
1693                           : (context_ptr->blk_geom
1694                                  ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1695                              context_ptr->blk_geom->origin_y) >>
1696                             2,
1697                     plane,
1698                     context_ptr->blk_geom->bsize,
1699                     txb_origin_x,
1700                     txb_origin_y,
1701                     context_ptr->blk_origin_x,
1702                     context_ptr->blk_origin_y,
1703                     0,
1704                     0,
1705                     &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header);
1706             }
1707         } else {
1708             uint8_t        top_neigh_array[64 * 2 + 1];
1709             uint8_t        left_neigh_array[64 * 2 + 1];
1710             PredictionMode mode;
1711 
1712             // Partition Loop
1713             int32_t plane_end = 2;
1714 
1715             for (int32_t plane = 1; plane <= plane_end; ++plane) {
1716                 TxSize tx_size =
1717                     plane
1718                         ? context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr]
1719                         : context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr];
1720 
1721                 if (plane == 1) {
1722                     if (blk_originy_uv != 0)
1723                         svt_memcpy(top_neigh_array + 1,
1724                                ep_cb_recon_neighbor_array->top_array + blk_originx_uv,
1725                                context_ptr->blk_geom->bwidth_uv * 2);
1726 
1727                     if (blk_originx_uv != 0)
1728                         svt_memcpy(left_neigh_array + 1,
1729                                ep_cb_recon_neighbor_array->left_array + blk_originy_uv,
1730                                context_ptr->blk_geom->bheight_uv * 2);
1731 
1732                     if (blk_originy_uv != 0 && blk_originx_uv != 0)
1733                         top_neigh_array[0] = left_neigh_array[0] =
1734                             ep_cb_recon_neighbor_array
1735                                 ->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv -
1736                                                  blk_originy_uv];
1737                 } else {
1738                     if (blk_originy_uv != 0)
1739                         svt_memcpy(top_neigh_array + 1,
1740                                ep_cr_recon_neighbor_array->top_array + blk_originx_uv,
1741                                context_ptr->blk_geom->bwidth_uv * 2);
1742 
1743                     if (blk_originx_uv != 0)
1744                         svt_memcpy(left_neigh_array + 1,
1745                                ep_cr_recon_neighbor_array->left_array + blk_originy_uv,
1746                                context_ptr->blk_geom->bheight_uv * 2);
1747 
1748                     if (blk_originy_uv != 0 && blk_originx_uv != 0)
1749                         top_neigh_array[0] = left_neigh_array[0] =
1750                             ep_cr_recon_neighbor_array
1751                                 ->top_left_array[MAX_PICTURE_HEIGHT_SIZE / 2 + blk_originx_uv -
1752                                                  blk_originy_uv];
1753                 }
1754 
1755                 mode = (pu_ptr->intra_chroma_mode == UV_CFL_PRED)
1756                            ? (PredictionMode)UV_DC_PRED
1757                            : (PredictionMode)pu_ptr->intra_chroma_mode;
1758 
1759                 // Hsan: if CHROMA_MODE_2, then CFL will be evaluated @ EP as no CHROMA @ MD
1760                 // If that's the case then you should ensure than the 1st chroma prediction uses UV_DC_PRED (that's the default configuration for CHROMA_MODE_2 if CFL applicable (set @ fast loop candidates injection) then MD assumes chroma mode always UV_DC_PRED)
1761                 svt_av1_predict_intra_block(
1762                     ED_STAGE,
1763                     context_ptr->blk_geom,
1764                     blk_ptr->av1xd,
1765                     plane ? context_ptr->blk_geom->bwidth_uv : context_ptr->blk_geom->bwidth,
1766                     plane ? context_ptr->blk_geom->bheight_uv : context_ptr->blk_geom->bheight,
1767                     tx_size,
1768                     mode,
1769                     plane ? pu_ptr->angle_delta[PLANE_TYPE_UV] : pu_ptr->angle_delta[PLANE_TYPE_Y],
1770                     0, //chroma
1771                     &blk_ptr->palette_info,
1772                     FILTER_INTRA_MODES,
1773                     top_neigh_array + 1,
1774                     left_neigh_array + 1,
1775                     recon_buffer,
1776                     plane ? 0
1777                           : (context_ptr->blk_geom
1778                                  ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1779                              context_ptr->blk_geom->origin_x) >>
1780                             2,
1781                     plane ? 0
1782                           : (context_ptr->blk_geom
1783                                  ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
1784                              context_ptr->blk_geom->origin_y) >>
1785                             2,
1786                     plane,
1787                     context_ptr->blk_geom->bsize,
1788                     txb_origin_x,
1789                     txb_origin_y,
1790                     context_ptr->blk_origin_x,
1791                     context_ptr->blk_origin_y,
1792                     0,
1793                     0,
1794                     &((SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr)->seq_header);
1795             }
1796         }
1797 
1798         // Encode Transform Unit -INTRA-
1799 
1800         av1_encode_loop_func_table[is_16bit](pcs_ptr,
1801                                              context_ptr,
1802                                              sb_ptr,
1803                                              txb_origin_x,
1804                                              txb_origin_y,
1805                                              recon_buffer,
1806                                              coeff_buffer_sb,
1807                                              residual_buffer,
1808                                              transform_buffer,
1809                                              inverse_quant_buffer,
1810                                              count_non_zero_coeffs,
1811                                              PICTURE_BUFFER_DESC_CHROMA_MASK,
1812                                              eobs[context_ptr->txb_itr]);
1813 
1814         if (pcs_ptr->cdf_ctrl.update_coef) {
1815             ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
1816                 context_ptr->md_context->candidate_buffer_ptr_array;
1817             ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
1818                 &(candidate_buffer_ptr_array_base[0]);
1819             ModeDecisionCandidateBuffer *candidate_buffer;
1820 
1821             // Set the Candidate Buffer
1822             candidate_buffer = candidate_buffer_ptr_array[0];
1823             // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
1824             candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr] =
1825                 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_Y];
1826             candidate_buffer->candidate_ptr->transform_type_uv =
1827                 blk_ptr->txb_array[context_ptr->txb_itr].transform_type[PLANE_TYPE_UV];
1828             candidate_buffer->candidate_ptr->type              = blk_ptr->prediction_mode_flag;
1829             candidate_buffer->candidate_ptr->pred_mode         = blk_ptr->pred_mode;
1830             candidate_buffer->candidate_ptr->filter_intra_mode = blk_ptr->filter_intra_mode;
1831             const uint32_t coeff1d_offset                      = context_ptr->coded_area_sb;
1832 
1833             av1_txb_estimate_coeff_bits(
1834                 context_ptr->md_context,
1835                 1, //allow_update_cdf,
1836                 &pcs_ptr->ec_ctx_array[sb_addr],
1837                 pcs_ptr,
1838                 candidate_buffer,
1839                 coeff1d_offset,
1840                 context_ptr->coded_area_sb_uv,
1841                 coeff_buffer_sb,
1842                 eobs[context_ptr->txb_itr][0],
1843                 eobs[context_ptr->txb_itr][1],
1844                 eobs[context_ptr->txb_itr][2],
1845                 &y_txb_coeff_bits,
1846                 &cb_txb_coeff_bits,
1847                 &cr_txb_coeff_bits,
1848                 context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
1849                 context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1850                 candidate_buffer->candidate_ptr->transform_type[context_ptr->txb_itr],
1851                 candidate_buffer->candidate_ptr->transform_type_uv,
1852                 COMPONENT_CHROMA);
1853         }
1854 
1855         av1_enc_gen_recon_func_ptr[is_16bit](context_ptr,
1856                                              txb_origin_x,
1857                                              txb_origin_y,
1858                                              recon_buffer,
1859                                              inverse_quant_buffer,
1860                                              PICTURE_BUFFER_DESC_CHROMA_MASK,
1861                                              eobs[context_ptr->txb_itr]);
1862 
1863         // Update Recon Samples-INTRA-
1864         encode_pass_update_recon_sample_neighbour_arrays(
1865             ep_luma_recon_neighbor_array,
1866             ep_cb_recon_neighbor_array,
1867             ep_cr_recon_neighbor_array,
1868             recon_buffer,
1869             txb_origin_x,
1870             txb_origin_y,
1871             context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
1872             context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
1873             context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1874             context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1875             PICTURE_BUFFER_DESC_CHROMA_MASK,
1876             is_16bit);
1877 
1878         context_ptr->coded_area_sb_uv +=
1879             context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr] *
1880             context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr];
1881 
1882         // Update the cb Dc Sign Level Coeff Neighbor Array
1883         {
1884             uint8_t dc_sign_level_coeff = (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
1885             neighbor_array_unit_mode_write(
1886                 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
1887                 (uint8_t *)&dc_sign_level_coeff,
1888                 ROUND_UV(txb_origin_x) >> 1,
1889                 ROUND_UV(txb_origin_y) >> 1,
1890                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1891                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1892                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1893         }
1894 
1895         // Update the cr DC Sign Level Coeff Neighbor Array
1896         {
1897             uint8_t dc_sign_level_coeff = (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
1898             neighbor_array_unit_mode_write(
1899                 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
1900                 (uint8_t *)&dc_sign_level_coeff,
1901                 ROUND_UV(txb_origin_x) >> 1,
1902                 ROUND_UV(txb_origin_y) >> 1,
1903                 context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1904                 context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
1905                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
1906         }
1907 
1908     } // Transform Loop
1909     for (context_ptr->txb_itr = 0; context_ptr->txb_itr < tot_tu; context_ptr->txb_itr++) {
1910         uint8_t uv_pass = blk_ptr->tx_depth && context_ptr->txb_itr ? 0 : 1;
1911 
1912         if (context_ptr->blk_geom->has_uv && uv_pass) {
1913             blk_ptr->block_has_coeff = blk_ptr->block_has_coeff |
1914                 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] |
1915                 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] |
1916                 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr];
1917 
1918             if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr])
1919                 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[0] = EB_TRUE;
1920             if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr])
1921                 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[0] = EB_TRUE;
1922         }
1923         else {
1924             blk_ptr->block_has_coeff =
1925                 blk_ptr->block_has_coeff | context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr];
1926         }
1927     } // Transform Loop
1928 }
1929 #define REFMVS_LIMIT ((1 << 12) - 1)
1930 
av1_copy_frame_mvs(PictureControlSet * pcs_ptr,const Av1Common * const cm,MbModeInfo mi,int mi_row,int mi_col,int x_mis,int y_mis,EbReferenceObject * object_ptr)1931 static void av1_copy_frame_mvs(PictureControlSet *pcs_ptr, const Av1Common *const cm, MbModeInfo mi,
1932                                int mi_row, int mi_col, int x_mis, int y_mis,
1933                                EbReferenceObject *object_ptr) {
1934     const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
1935     MV_REF *  frame_mvs        = object_ptr->mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
1936     x_mis                      = ROUND_POWER_OF_TWO(x_mis, 1);
1937     y_mis                      = ROUND_POWER_OF_TWO(y_mis, 1);
1938     int w, h;
1939 
1940     for (h = 0; h < y_mis; h++) {
1941         MV_REF *mv = frame_mvs;
1942         for (w = 0; w < x_mis; w++) {
1943             mv->ref_frame = NONE_FRAME;
1944             mv->mv.as_int = 0;
1945 
1946             for (int idx = 0; idx < 2; ++idx) {
1947                 MvReferenceFrame ref_frame = mi.block_mi.ref_frame[idx];
1948                 if (ref_frame > INTRA_FRAME) {
1949                     int8_t ref_idx = pcs_ptr->ref_frame_side[ref_frame];
1950                     if (ref_idx) continue;
1951                     if ((abs(mi.block_mi.mv[idx].as_mv.row) > REFMVS_LIMIT) ||
1952                         (abs(mi.block_mi.mv[idx].as_mv.col) > REFMVS_LIMIT))
1953                         continue;
1954                     mv->ref_frame = ref_frame;
1955                     mv->mv.as_int = mi.block_mi.mv[idx].as_int;
1956                 }
1957             }
1958             mv++;
1959         }
1960         frame_mvs += frame_mvs_stride;
1961     }
1962 }
1963 /*******************************************
1964 * Encode Pass
1965 *
1966 * Summary: Performs an AV1 conformant
1967 *   reconstruction based on the SB
1968 *   mode decision.
1969 *
1970 * Inputs:
1971 *   SourcePic
1972 *   Coding Results
1973 *   SB Location
1974 *   Sequence Control Set
1975 *   Picture Control Set
1976 *
1977 * Outputs:
1978 *   Reconstructed Samples
1979 *   Coefficient Samples
1980 *
1981 *******************************************/
av1_encode_decode(SequenceControlSet * scs_ptr,PictureControlSet * pcs_ptr,SuperBlock * sb_ptr,uint32_t sb_addr,uint32_t sb_origin_x,uint32_t sb_origin_y,EncDecContext * context_ptr)1982 EB_EXTERN void av1_encode_decode(SequenceControlSet *scs_ptr, PictureControlSet *pcs_ptr,
1983                                SuperBlock *sb_ptr, uint32_t sb_addr, uint32_t sb_origin_x,
1984                                uint32_t sb_origin_y, EncDecContext *context_ptr) {
1985     EbBool               is_16bit = context_ptr->is_16bit;
1986     EbPictureBufferDesc *recon_buffer;
1987     EbPictureBufferDesc *coeff_buffer_sb = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->quantized_coeff[sb_addr];
1988     EbPictureBufferDesc *input_picture;
1989     ModeDecisionContext *md_context_ptr;
1990     md_context_ptr = context_ptr->md_context;
1991     input_picture  = context_ptr->input_samples =
1992         (EbPictureBufferDesc *)pcs_ptr->parent_pcs_ptr->enhanced_picture_ptr;
1993     // SB Stats
1994     uint32_t sb_width =
1995         MIN(scs_ptr->sb_size_pix, pcs_ptr->parent_pcs_ptr->aligned_width - sb_origin_x);
1996     uint32_t sb_height =
1997         MIN(scs_ptr->sb_size_pix, pcs_ptr->parent_pcs_ptr->aligned_height - sb_origin_y);
1998     // MV merge mode
1999     uint32_t              y_has_coeff;
2000     uint32_t              u_has_coeff;
2001     uint32_t              v_has_coeff;
2002     uint32_t              count_non_zero_coeffs[3];
2003     uint16_t              eobs[MAX_TXB_COUNT][3];
2004     uint64_t              y_txb_coeff_bits;
2005     uint64_t              cb_txb_coeff_bits;
2006     uint64_t              cr_txb_coeff_bits;
2007     EncodeContext *       encode_context_ptr;
2008     // Dereferencing early
2009     uint16_t tile_idx = context_ptr->tile_index;
2010     uint16_t total_tile_cnt = pcs_ptr->parent_pcs_ptr->av1_cm->tiles_info.tile_cols *
2011         pcs_ptr->parent_pcs_ptr->av1_cm->tiles_info.tile_rows;
2012     NeighborArrayUnit *ep_mode_type_neighbor_array = pcs_ptr->ep_mode_type_neighbor_array[tile_idx];
2013     NeighborArrayUnit *ep_intra_luma_mode_neighbor_array =
2014         pcs_ptr->ep_intra_luma_mode_neighbor_array[tile_idx];
2015     NeighborArrayUnit *ep_intra_chroma_mode_neighbor_array =
2016         pcs_ptr->ep_intra_chroma_mode_neighbor_array[tile_idx];
2017     NeighborArrayUnit *ep_mv_neighbor_array = pcs_ptr->ep_mv_neighbor_array[tile_idx];
2018     NeighborArrayUnit *ep_luma_recon_neighbor_array =
2019         is_16bit ? pcs_ptr->ep_luma_recon_neighbor_array16bit[tile_idx]
2020                  : pcs_ptr->ep_luma_recon_neighbor_array[tile_idx];
2021     NeighborArrayUnit *ep_cb_recon_neighbor_array =
2022         is_16bit ? pcs_ptr->ep_cb_recon_neighbor_array16bit[tile_idx]
2023                  : pcs_ptr->ep_cb_recon_neighbor_array[tile_idx];
2024     NeighborArrayUnit *ep_cr_recon_neighbor_array =
2025         is_16bit ? pcs_ptr->ep_cr_recon_neighbor_array16bit[tile_idx]
2026                  : pcs_ptr->ep_cr_recon_neighbor_array[tile_idx];
2027     NeighborArrayUnit *ep_skip_flag_neighbor_array = pcs_ptr->ep_skip_flag_neighbor_array[tile_idx];
2028 
2029     EbBool       dlf_enable_flag = (EbBool)pcs_ptr->parent_pcs_ptr->loop_filter_mode;
2030     encode_context_ptr =
2031         ((SequenceControlSet *)(pcs_ptr->scs_wrapper_ptr->object_ptr))->encode_context_ptr;
2032 
2033     if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
2034         //get the 16bit form of the input SB
2035         if (is_16bit)
2036             recon_buffer = ((EbReferenceObject *)
2037                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
2038                                ->reference_picture16bit;
2039         else
2040             recon_buffer = ((EbReferenceObject *)
2041                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
2042                                ->reference_picture;
2043     else // non ref pictures
2044         recon_buffer = is_16bit ? pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr : pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
2045     if (is_16bit && scs_ptr->static_config.encoder_bit_depth > EB_8BIT) {
2046         //SB128_TODO change 10bit SB creation
2047 
2048         if ((scs_ptr->static_config.ten_bit_format == 1) ||
2049             (scs_ptr->static_config.compressed_ten_bit_format == 1)) {
2050             const uint32_t input_luma_offset =
2051                 ((sb_origin_y + input_picture->origin_y) * input_picture->stride_y) +
2052                 (sb_origin_x + input_picture->origin_x);
2053             const uint32_t input_cb_offset =
2054                 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cb) +
2055                 ((sb_origin_x + input_picture->origin_x) >> 1);
2056             const uint32_t input_cr_offset =
2057                 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cr) +
2058                 ((sb_origin_x + input_picture->origin_x) >> 1);
2059             const uint16_t luma_2bit_width = input_picture->width / 4;
2060             const uint16_t chroma_2bit_width = input_picture->width / 8;
2061 
2062             compressed_pack_sb(input_picture->buffer_y + input_luma_offset,
2063                                input_picture->stride_y,
2064                                input_picture->buffer_bit_inc_y + sb_origin_y * luma_2bit_width +
2065                                    (sb_origin_x / 4) * sb_height,
2066                                sb_width / 4,
2067                                (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
2068                                context_ptr->input_sample16bit_buffer->stride_y,
2069                                sb_width,
2070                                sb_height);
2071 
2072             compressed_pack_sb(input_picture->buffer_cb + input_cb_offset,
2073                                input_picture->stride_cb,
2074                                input_picture->buffer_bit_inc_cb +
2075                                    sb_origin_y / 2 * chroma_2bit_width +
2076                                    (sb_origin_x / 8) * (sb_height / 2),
2077                                sb_width / 8,
2078                                (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
2079                                context_ptr->input_sample16bit_buffer->stride_cb,
2080                                sb_width >> 1,
2081                                sb_height >> 1);
2082 
2083             compressed_pack_sb(input_picture->buffer_cr + input_cr_offset,
2084                                input_picture->stride_cr,
2085                                input_picture->buffer_bit_inc_cr +
2086                                    sb_origin_y / 2 * chroma_2bit_width +
2087                                    (sb_origin_x / 8) * (sb_height / 2),
2088                                sb_width / 8,
2089                                (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
2090                                context_ptr->input_sample16bit_buffer->stride_cr,
2091                                sb_width >> 1,
2092                                sb_height >> 1);
2093         } else {
2094             const uint32_t input_luma_offset =
2095                 ((sb_origin_y + input_picture->origin_y) * input_picture->stride_y) +
2096                 (sb_origin_x + input_picture->origin_x);
2097             const uint32_t input_bit_inc_luma_offset =
2098                 ((sb_origin_y + input_picture->origin_y) * input_picture->stride_bit_inc_y) +
2099                 (sb_origin_x + input_picture->origin_x);
2100             const uint32_t input_cb_offset =
2101                 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cb) +
2102                 ((sb_origin_x + input_picture->origin_x) >> 1);
2103             const uint32_t input_bit_inc_cb_offset =
2104                 (((sb_origin_y + input_picture->origin_y) >> 1) *
2105                  input_picture->stride_bit_inc_cb) +
2106                 ((sb_origin_x + input_picture->origin_x) >> 1);
2107             const uint32_t input_cr_offset =
2108                 (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cr) +
2109                 ((sb_origin_x + input_picture->origin_x) >> 1);
2110             const uint32_t input_bit_inc_cr_offset = (((sb_origin_y + input_picture->origin_y) >> 1) *
2111                                                   input_picture->stride_bit_inc_cr) +
2112                                                  ((sb_origin_x + input_picture->origin_x) >> 1);
2113 
2114             pack2d_src(input_picture->buffer_y + input_luma_offset,
2115                        input_picture->stride_y,
2116                        input_picture->buffer_bit_inc_y + input_bit_inc_luma_offset,
2117                        input_picture->stride_bit_inc_y,
2118                        (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
2119                        context_ptr->input_sample16bit_buffer->stride_y,
2120                        sb_width,
2121                        sb_height);
2122 
2123             pack2d_src(input_picture->buffer_cb + input_cb_offset,
2124                        input_picture->stride_cr,
2125                        input_picture->buffer_bit_inc_cb + input_bit_inc_cb_offset,
2126                        input_picture->stride_bit_inc_cr,
2127                        (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
2128                        context_ptr->input_sample16bit_buffer->stride_cb,
2129                        sb_width >> 1,
2130                        sb_height >> 1);
2131 
2132             pack2d_src(input_picture->buffer_cr + input_cr_offset,
2133                        input_picture->stride_cr,
2134                        input_picture->buffer_bit_inc_cr + input_bit_inc_cr_offset,
2135                        input_picture->stride_bit_inc_cr,
2136                        (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
2137                        context_ptr->input_sample16bit_buffer->stride_cr,
2138                        sb_width >> 1,
2139                        sb_height >> 1);
2140         // PAD the packed source in incomplete sb up to max SB size
2141         pad_input_picture_16bit(
2142                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y,
2143                 context_ptr->input_sample16bit_buffer->stride_y,
2144                 sb_width,
2145                 sb_height,
2146                 scs_ptr->sb_size_pix - sb_width,
2147                 scs_ptr->sb_size_pix - sb_height);
2148         pad_input_picture_16bit(
2149                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb,
2150                 context_ptr->input_sample16bit_buffer->stride_cb,
2151                 sb_width >> 1,
2152                 sb_height >> 1,
2153                 (scs_ptr->sb_size_pix- sb_width  )>>1,
2154                 (scs_ptr->sb_size_pix - sb_height)>>1);
2155 
2156         pad_input_picture_16bit(
2157                 (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr,
2158                 context_ptr->input_sample16bit_buffer->stride_cr,
2159                 sb_width >> 1,
2160                 sb_height >> 1,
2161                 (scs_ptr->sb_size_pix - sb_width  )>>1,
2162                 (scs_ptr->sb_size_pix  - sb_height)>>1);
2163         }
2164 
2165         if (context_ptr->md_context->hbd_mode_decision == 0)
2166             store16bit_input_src(context_ptr->input_sample16bit_buffer,
2167                                  pcs_ptr,
2168                                  sb_origin_x,
2169                                  sb_origin_y,
2170                                  scs_ptr->sb_size_pix,
2171                                  scs_ptr->sb_size_pix);
2172     }
2173 
2174     if (is_16bit && scs_ptr->static_config.encoder_bit_depth == EB_8BIT) {
2175         const uint32_t input_luma_offset =
2176             ((sb_origin_y + input_picture->origin_y) * input_picture->stride_y) +
2177             (sb_origin_x + input_picture->origin_x);
2178         const uint32_t input_cb_offset =
2179             (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cb) +
2180             ((sb_origin_x + input_picture->origin_x) >> 1);
2181         const uint32_t input_cr_offset =
2182             (((sb_origin_y + input_picture->origin_y) >> 1) * input_picture->stride_cr) +
2183             ((sb_origin_x + input_picture->origin_x) >> 1);
2184 
2185         sb_width =
2186             ((sb_width < MIN_SB_SIZE) || ((sb_width > MIN_SB_SIZE) && (sb_width < MAX_SB_SIZE)))
2187             ? MIN(scs_ptr->sb_size_pix,
2188             (pcs_ptr->parent_pcs_ptr->aligned_width + scs_ptr->right_padding) -
2189                 sb_origin_x)
2190             : sb_width;
2191         sb_height =
2192             ((sb_height < MIN_SB_SIZE) || ((sb_height > MIN_SB_SIZE) && (sb_height < MAX_SB_SIZE)))
2193             ? MIN(scs_ptr->sb_size_pix,
2194             (pcs_ptr->parent_pcs_ptr->aligned_height + scs_ptr->bot_padding) -
2195                 sb_origin_y)
2196             : sb_height;
2197 
2198         // PACK Y
2199         uint16_t *buf_16bit = (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_y;
2200         uint8_t * buf_8bit = input_picture->buffer_y + input_luma_offset;
2201         svt_convert_8bit_to_16bit(buf_8bit,
2202             input_picture->stride_y,
2203             buf_16bit,
2204             context_ptr->input_sample16bit_buffer->stride_y,
2205             sb_width,
2206             sb_height);
2207 
2208         // PACK CB
2209         buf_16bit = (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cb;
2210         buf_8bit = input_picture->buffer_cb + input_cb_offset;
2211         svt_convert_8bit_to_16bit(buf_8bit,
2212             input_picture->stride_cb,
2213             buf_16bit,
2214             context_ptr->input_sample16bit_buffer->stride_cb,
2215             sb_width >> 1,
2216             sb_height >> 1);
2217 
2218         // PACK CR
2219         buf_16bit = (uint16_t *)context_ptr->input_sample16bit_buffer->buffer_cr;
2220         buf_8bit = input_picture->buffer_cr + input_cr_offset;
2221         svt_convert_8bit_to_16bit(buf_8bit,
2222             input_picture->stride_cr,
2223             buf_16bit,
2224             context_ptr->input_sample16bit_buffer->stride_cr,
2225             sb_width >> 1,
2226             sb_height >> 1);
2227     }
2228     context_ptr->coded_area_sb                = 0;
2229     context_ptr->coded_area_sb_uv             = 0;
2230 
2231     if (dlf_enable_flag && pcs_ptr->parent_pcs_ptr->loop_filter_mode == 1 && total_tile_cnt == 1) {
2232         if (sb_addr == 0) {
2233             svt_av1_loop_filter_init(pcs_ptr);
2234 
2235             svt_av1_pick_filter_level(
2236                 (EbPictureBufferDesc *)pcs_ptr->parent_pcs_ptr->enhanced_picture_ptr,
2237                 pcs_ptr,
2238                 LPF_PICK_FROM_Q);
2239 
2240             svt_av1_loop_filter_frame_init(
2241                 &pcs_ptr->parent_pcs_ptr->frm_hdr, &pcs_ptr->parent_pcs_ptr->lf_info, 0, 3);
2242         }
2243     }
2244 
2245     uint32_t final_blk_itr    = 0;
2246     // CU Loop
2247     uint32_t blk_it = 0;
2248     while (blk_it < scs_ptr->max_block_cnt) {
2249         BlkStruct *blk_ptr = context_ptr->blk_ptr =
2250             &context_ptr->md_context->md_blk_arr_nsq[blk_it];
2251         //At the boundary when it's not a complete super block.
2252         //We may only use part of the blocks in MD.
2253         //And the mds_idx of the parent block is not set properly
2254         //And it will generate the wrong cdf ctx and influence the MD for the next SB
2255         blk_ptr->mds_idx = blk_it;
2256         PartitionType part = blk_ptr->part;
2257 
2258         const BlockGeom *blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_it);
2259         sb_ptr->cu_partition_array[blk_it] = context_ptr->md_context->md_blk_arr_nsq[blk_it].part;
2260         if (pcs_ptr->cdf_ctrl.update_se) {
2261             blk_ptr->av1xd->tile_ctx = &pcs_ptr->ec_ctx_array[sb_addr];
2262             // Update the partition stats
2263             update_part_stats(pcs_ptr,
2264                               blk_ptr,
2265                               tile_idx,
2266                               (sb_origin_y + blk_geom->origin_y) >> MI_SIZE_LOG2,
2267                               (sb_origin_x + blk_geom->origin_x) >> MI_SIZE_LOG2);
2268         }
2269         if ((use_input_stat(scs_ptr) || scs_ptr->lap_enabled ) &&
2270             blk_it == 0 && sb_origin_x == 0 && blk_geom->origin_x == 0 && sb_origin_y == 0 && blk_geom->origin_y == 0) {
2271             pcs_ptr->parent_pcs_ptr->pcs_total_rate = 0;
2272         }
2273         if (part != PARTITION_SPLIT && pcs_ptr->parent_pcs_ptr->sb_geom[sb_addr].block_is_allowed[blk_it]) {
2274             int32_t offset_d1 = ns_blk_offset[(int32_t)part]; //blk_ptr->best_d1_blk; // TOCKECK
2275             int32_t num_d1_block =
2276                 ns_blk_num[(int32_t)part]; // context_ptr->blk_geom->totns; // TOCKECK
2277 
2278             // for (int32_t d1_itr = blk_it; d1_itr < blk_it + num_d1_block; d1_itr++) {
2279             for (int32_t d1_itr = (int32_t)blk_it + offset_d1;
2280                  d1_itr < (int32_t)blk_it + offset_d1 + num_d1_block;
2281                  d1_itr++) {
2282                 blk_geom = context_ptr->blk_geom = get_blk_geom_mds(d1_itr);
2283 
2284                 // PU Stack variables
2285                 PredictionUnit *     pu_ptr           = (PredictionUnit *)NULL; //  done
2286                 EbPictureBufferDesc *residual_buffer  = context_ptr->residual_buffer;
2287                 EbPictureBufferDesc *transform_buffer = context_ptr->transform_buffer;
2288 
2289                 EbPictureBufferDesc *inverse_quant_buffer = context_ptr->inverse_quant_buffer;
2290 
2291                 blk_ptr = context_ptr->blk_ptr =
2292                     &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
2293 
2294                 context_ptr->blk_origin_x = (uint16_t)(sb_origin_x + blk_geom->origin_x);
2295                 context_ptr->blk_origin_y = (uint16_t)(sb_origin_y + blk_geom->origin_y);
2296                 if (context_ptr->md_context->ep_use_md_skip_decision)
2297                     context_ptr->md_skip_blk = !blk_ptr->block_has_coeff;
2298                 else
2299                     context_ptr->md_skip_blk =
2300                     context_ptr->md_context->blk_skip_decision
2301                     ? ((blk_ptr->prediction_mode_flag == INTRA_MODE || blk_ptr->block_has_coeff)
2302                         ? 0
2303                         : 1)
2304                     : 0;
2305                 blk_ptr->block_has_coeff = 0;
2306 
2307                 // if(pcs_ptr->picture_number==4 && context_ptr->blk_origin_x==0 && context_ptr->blk_origin_y==0)
2308                 //     SVT_LOG("CHEDD");
2309                 uint32_t coded_area_org    = context_ptr->coded_area_sb;
2310                 uint32_t coded_area_org_uv = context_ptr->coded_area_sb_uv;
2311                 // for now, segmentation independent of sharpness/delta QP.
2312                 if (pcs_ptr->parent_pcs_ptr->frm_hdr.segmentation_params.segmentation_enabled) {
2313                     apply_segmentation_based_quantization(blk_geom, pcs_ptr, sb_ptr, blk_ptr);
2314                     sb_ptr->qindex = blk_ptr->qindex;
2315                 } else {
2316                     blk_ptr->qindex = sb_ptr->qindex;
2317                 }
2318                 svt_block_on_mutex(pcs_ptr->parent_pcs_ptr->pcs_total_rate_mutex);
2319                 pcs_ptr->parent_pcs_ptr->pcs_total_rate += blk_ptr->total_rate;
2320                 svt_release_mutex(pcs_ptr->parent_pcs_ptr->pcs_total_rate_mutex);
2321                 if (blk_ptr->prediction_mode_flag == INTRA_MODE) {
2322                     context_ptr->is_inter = blk_ptr->use_intrabc;
2323                     if (scs_ptr->static_config.encoder_bit_depth > EB_8BIT &&
2324                         pcs_ptr->hbd_mode_decision == 0 &&
2325                         blk_ptr->palette_info.pmi.palette_size[0] > 0) {
2326                         //MD was done on 8bit, scale  palette colors to 10bit
2327                         for (uint8_t col = 0; col < blk_ptr->palette_info.pmi.palette_size[0];
2328                              col++)
2329                             blk_ptr->palette_info.pmi.palette_colors[col] *= 4;
2330                     }
2331                     // *Note - Transforms are the same size as predictions
2332                     // Partition Loop
2333                     context_ptr->txb_itr = 0;
2334                     // Transform partitioning path (INTRA Luma/Chroma)
2335                     if (blk_ptr->use_intrabc == 0) {
2336                         // Set the PU Loop Variables
2337                         pu_ptr = blk_ptr->prediction_unit_array;
2338 
2339                         perform_intra_coding_loop(
2340                             pcs_ptr, sb_ptr, sb_addr, blk_ptr, pu_ptr, context_ptr);
2341 
2342                         // Update the Intra-specific Neighbor Arrays
2343                         encode_pass_update_intra_mode_neighbor_arrays(
2344                             ep_mode_type_neighbor_array,
2345                             ep_intra_luma_mode_neighbor_array,
2346                             ep_intra_chroma_mode_neighbor_array,
2347                             (uint8_t)blk_ptr->pred_mode,
2348                             (uint8_t)pu_ptr->intra_chroma_mode,
2349                             context_ptr->blk_origin_x,
2350                             context_ptr->blk_origin_y,
2351                             context_ptr->blk_geom->bwidth,
2352                             context_ptr->blk_geom->bheight,
2353                             context_ptr->blk_geom->bwidth_uv,
2354                             context_ptr->blk_geom->bheight_uv,
2355                             blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK
2356                                              : PICTURE_BUFFER_DESC_LUMA_MASK);
2357 
2358                     }
2359                     // Transform partitioning free patch (except the 128x128 case)
2360                     else {
2361                         // Set the PU Loop Variables
2362                         pu_ptr = blk_ptr->prediction_unit_array;
2363 
2364                             {
2365                                 //keep final usefull mvp for entropy
2366                                 svt_memcpy(blk_ptr->av1xd->final_ref_mv_stack,
2367                                        context_ptr->md_context
2368                                            ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
2369                                            .ed_ref_mv_stack[blk_ptr->prediction_unit_array[0]
2370                                                                 .ref_frame_type],
2371                                        sizeof(CandidateMv) * MAX_REF_MV_STACK_SIZE);
2372                                 {
2373                                     uint8_t      ref_frame_type = blk_ptr->prediction_unit_array[0].ref_frame_type;
2374                                     MacroBlockD *xd = blk_ptr->av1xd;
2375                                     if (blk_ptr->pred_mode == NEWMV || blk_ptr->pred_mode == NEW_NEWMV) {
2376                                         int32_t idx;
2377                                         for (idx = 0; idx < 2; ++idx) {
2378                                             if (xd->ref_mv_count[ref_frame_type] > idx + 1)
2379                                                 blk_ptr->drl_ctx[idx] = av1_drl_ctx(xd->final_ref_mv_stack, idx);
2380                                             else
2381                                                 blk_ptr->drl_ctx[idx] = -1;
2382                                         }
2383                                     }
2384 
2385                                     if (have_nearmv_in_inter_mode(blk_ptr->pred_mode)) {
2386                                         int32_t idx;
2387                                         // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
2388                                         for (idx = 1; idx < 3; ++idx) {
2389                                             if (xd->ref_mv_count[ref_frame_type] > idx + 1)
2390                                                 blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(xd->final_ref_mv_stack, idx);
2391                                             else
2392                                                 blk_ptr->drl_ctx_near[idx - 1] = -1;
2393                                         }
2394                                     }
2395                                 }
2396 
2397 
2398                                 // Set MvUnit
2399                                 context_ptr->mv_unit.pred_direction =
2400                                     (uint8_t)pu_ptr->inter_pred_direction_index;
2401                                 context_ptr->mv_unit.mv[REF_LIST_0].mv_union =
2402                                     pu_ptr->mv[REF_LIST_0].mv_union;
2403                                 context_ptr->mv_unit.mv[REF_LIST_1].mv_union =
2404                                     pu_ptr->mv[REF_LIST_1].mv_union;
2405 
2406                                 EbPictureBufferDesc *ref_pic_list0 =
2407                                     ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
2408                                          ->reference_picture_wrapper_ptr->object_ptr)
2409                                         ->reference_picture;
2410 
2411                                 if (is_16bit)
2412                                     ref_pic_list0 =
2413                                         ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
2414                                              ->reference_picture_wrapper_ptr->object_ptr)
2415                                             ->reference_picture16bit;
2416 
2417                                 if (is_16bit && !(scs_ptr->static_config.superres_mode > SUPERRES_NONE)) {
2418                                     av1_inter_prediction_16bit_pipeline(
2419                                         pcs_ptr,
2420                                         blk_ptr->interp_filters,
2421                                         blk_ptr,
2422                                         blk_ptr->prediction_unit_array->ref_frame_type,
2423                                         &context_ptr->mv_unit,
2424                                         1, // use_intrabc,
2425                                         SIMPLE_TRANSLATION,
2426                                         0,
2427                                         0,
2428                                         1,
2429                                         &blk_ptr->interinter_comp,
2430                                         ep_luma_recon_neighbor_array,
2431                                         ep_cb_recon_neighbor_array,
2432                                         ep_cr_recon_neighbor_array,
2433                                         blk_ptr->is_interintra_used,
2434                                         blk_ptr->interintra_mode,
2435                                         blk_ptr->use_wedge_interintra,
2436                                         blk_ptr->interintra_wedge_index,
2437                                         context_ptr->blk_origin_x,
2438                                         context_ptr->blk_origin_y,
2439                                         blk_geom->bwidth,
2440                                         blk_geom->bheight,
2441                                         ref_pic_list0,
2442                                         0,
2443                                         recon_buffer,
2444                                         context_ptr->blk_origin_x,
2445                                         context_ptr->blk_origin_y,
2446                                         EB_TRUE,
2447                                         (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2448                                 } else {
2449                                     av1_inter_prediction(
2450                                         scs_ptr,
2451                                         pcs_ptr,
2452                                         blk_ptr->interp_filters,
2453                                         blk_ptr,
2454                                         blk_ptr->prediction_unit_array->ref_frame_type,
2455                                         &context_ptr->mv_unit,
2456                                         1, // use_intrabc,
2457                                         SIMPLE_TRANSLATION,
2458                                         0,
2459                                         0,
2460                                         1,
2461                                         &blk_ptr->interinter_comp,
2462                                         ep_luma_recon_neighbor_array,
2463                                         ep_cb_recon_neighbor_array,
2464                                         ep_cr_recon_neighbor_array,
2465                                         blk_ptr->is_interintra_used,
2466                                         blk_ptr->interintra_mode,
2467                                         blk_ptr->use_wedge_interintra,
2468                                         blk_ptr->interintra_wedge_index,
2469                                         context_ptr->blk_origin_x,
2470                                         context_ptr->blk_origin_y,
2471                                         blk_geom->bwidth,
2472                                         blk_geom->bheight,
2473                                         ref_pic_list0,
2474                                         0,
2475                                         recon_buffer,
2476                                         context_ptr->blk_origin_x,
2477                                         context_ptr->blk_origin_y,
2478                                         EB_TRUE,
2479                                         (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2480                                 }
2481                             }
2482                             // Initialize the Transform Loop
2483 
2484                             context_ptr->txb_itr = 0;
2485                             y_has_coeff = 0;
2486                             u_has_coeff = 0;
2487                             v_has_coeff = 0;
2488 
2489                             uint32_t totTu = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
2490 
2491                             for (uint8_t tuIt = 0; tuIt < totTu; tuIt++) {
2492                                 context_ptr->txb_itr = tuIt;
2493                                 uint8_t uv_pass = blk_ptr->tx_depth && tuIt ? 0 : 1;
2494 
2495                                 uint16_t txb_origin_x = context_ptr->blk_origin_x + context_ptr->blk_geom->tx_org_x[1][blk_ptr->tx_depth][tuIt] - context_ptr->blk_geom->origin_x;
2496                                 uint16_t txb_origin_y = context_ptr->blk_origin_y + context_ptr->blk_geom->tx_org_y[1][blk_ptr->tx_depth][tuIt] - context_ptr->blk_geom->origin_y;
2497 
2498                                 context_ptr->md_context->luma_txb_skip_context = 0;
2499                                 context_ptr->md_context->luma_dc_sign_context = 0;
2500                                 get_txb_ctx(
2501                                     pcs_ptr,
2502                                     COMPONENT_LUMA,
2503                                     pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
2504                                     txb_origin_x,
2505                                     txb_origin_y,
2506                                     context_ptr->blk_geom->bsize,
2507                                     context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
2508                                     &context_ptr->md_context->luma_txb_skip_context,
2509                                     &context_ptr->md_context->luma_dc_sign_context);
2510 
2511 
2512                                 if (context_ptr->blk_geom->has_uv && uv_pass) {
2513                                     context_ptr->md_context->cb_txb_skip_context = 0;
2514                                     context_ptr->md_context->cb_dc_sign_context = 0;
2515                                     get_txb_ctx(
2516                                         pcs_ptr,
2517                                         COMPONENT_CHROMA,
2518                                         pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
2519                                         ROUND_UV(txb_origin_x) >> 1,
2520                                         ROUND_UV(txb_origin_y) >> 1,
2521                                         context_ptr->blk_geom->bsize_uv,
2522                                         context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2523                                         &context_ptr->md_context->cb_txb_skip_context,
2524                                         &context_ptr->md_context->cb_dc_sign_context);
2525 
2526                                     context_ptr->md_context->cr_txb_skip_context = 0;
2527                                     context_ptr->md_context->cr_dc_sign_context = 0;
2528                                     get_txb_ctx(
2529                                         pcs_ptr,
2530                                         COMPONENT_CHROMA,
2531                                         pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
2532                                         ROUND_UV(txb_origin_x) >> 1,
2533                                         ROUND_UV(txb_origin_y) >> 1,
2534                                         context_ptr->blk_geom->bsize_uv,
2535                                         context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2536                                         &context_ptr->md_context->cr_txb_skip_context,
2537                                         &context_ptr->md_context->cr_dc_sign_context);
2538                                 }
2539                             // Encode Transform Unit -INTRA-
2540                                 {
2541 
2542                                 av1_encode_loop_func_table[is_16bit](
2543                                     pcs_ptr,
2544                                     context_ptr,
2545                                     sb_ptr,
2546                                     txb_origin_x,
2547                                     txb_origin_y,
2548                                     recon_buffer,
2549                                     coeff_buffer_sb,
2550                                     residual_buffer,
2551                                     transform_buffer,
2552                                     inverse_quant_buffer,
2553                                     count_non_zero_coeffs,
2554                                     (context_ptr->blk_geom->has_uv && uv_pass) ? PICTURE_BUFFER_DESC_FULL_MASK :
2555                                                                                  PICTURE_BUFFER_DESC_LUMA_MASK,
2556                                     eobs[context_ptr->txb_itr]);
2557 
2558                                 if (pcs_ptr->cdf_ctrl.update_coef) {
2559                                     ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
2560                                         context_ptr->md_context->candidate_buffer_ptr_array;
2561                                     ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
2562                                         &(candidate_buffer_ptr_array_base[0]);
2563                                     ModeDecisionCandidateBuffer *candidate_buffer;
2564 
2565                                     // Set the Candidate Buffer
2566                                     candidate_buffer = candidate_buffer_ptr_array[0];
2567                                     // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
2568                                     candidate_buffer->candidate_ptr->type =
2569                                         blk_ptr->prediction_mode_flag;
2570                                     candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
2571                                     candidate_buffer->candidate_ptr->filter_intra_mode =
2572                                         blk_ptr->filter_intra_mode;
2573                                     const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
2574 
2575                                     av1_txb_estimate_coeff_bits(
2576                                         context_ptr->md_context,
2577                                         1, //allow_update_cdf,
2578                                         &pcs_ptr->ec_ctx_array[sb_addr],
2579                                         pcs_ptr,
2580                                         candidate_buffer,
2581                                         coeff1d_offset,
2582                                         context_ptr->coded_area_sb_uv,
2583                                         coeff_buffer_sb,
2584                                         eobs[context_ptr->txb_itr][0],
2585                                         eobs[context_ptr->txb_itr][1],
2586                                         eobs[context_ptr->txb_itr][2],
2587                                         &y_txb_coeff_bits,
2588                                         &cb_txb_coeff_bits,
2589                                         &cr_txb_coeff_bits,
2590                                         context_ptr->blk_geom
2591                                             ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
2592                                         context_ptr->blk_geom
2593                                             ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2594                                         blk_ptr->txb_array[context_ptr->txb_itr]
2595                                             .transform_type[PLANE_TYPE_Y],
2596                                         blk_ptr->txb_array[context_ptr->txb_itr]
2597                                             .transform_type[PLANE_TYPE_UV],
2598                                         context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL :
2599                                                                                    COMPONENT_LUMA);
2600                                 }
2601                                 //intra mode
2602                                 av1_enc_gen_recon_func_ptr[is_16bit](
2603                                     context_ptr,
2604                                     txb_origin_x,
2605                                     txb_origin_y,
2606                                     recon_buffer,
2607                                     inverse_quant_buffer,
2608                                     context_ptr->blk_geom->has_uv && uv_pass ? PICTURE_BUFFER_DESC_FULL_MASK :
2609                                                                                PICTURE_BUFFER_DESC_LUMA_MASK,
2610                                     eobs[context_ptr->txb_itr]);
2611                             }
2612                             if (context_ptr->blk_geom->has_uv && uv_pass) {
2613                                 y_has_coeff |=
2614                                     context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr];
2615                                 u_has_coeff |=
2616                                     context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr];
2617                                 v_has_coeff |=
2618                                     context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr];
2619                             }
2620                             else
2621                                 y_has_coeff |=
2622                                 context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr];
2623 
2624                             context_ptr->coded_area_sb += blk_geom->tx_width[blk_ptr->tx_depth][tuIt] * blk_geom->tx_height[blk_ptr->tx_depth][tuIt];
2625 
2626                             if (context_ptr->blk_geom->has_uv && uv_pass)
2627                                 context_ptr->coded_area_sb_uv += blk_geom->tx_width_uv[blk_ptr->tx_depth][tuIt] * blk_geom->tx_height_uv[blk_ptr->tx_depth][tuIt];
2628 
2629                             // Update the luma Dc Sign Level Coeff Neighbor Array
2630                             {
2631                                     uint8_t dcSignLevelCoeff =
2632                                         (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
2633                                 neighbor_array_unit_mode_write(
2634                                     pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
2635                                     (uint8_t*)&dcSignLevelCoeff,
2636                                     txb_origin_x,
2637                                     txb_origin_y,
2638                                     context_ptr->blk_geom->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
2639                                     context_ptr->blk_geom->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
2640                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
2641                             }
2642 
2643                             if (context_ptr->blk_geom->has_uv && uv_pass)
2644                             {
2645                                 // Update the cb Dc Sign Level Coeff Neighbor Array
2646                                 uint8_t dcSignLevelCoeff =
2647                                     (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
2648                                 neighbor_array_unit_mode_write(
2649                                     pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
2650                                     (uint8_t*)&dcSignLevelCoeff,
2651                                     ROUND_UV(txb_origin_x) >> 1,
2652                                     ROUND_UV(txb_origin_y) >> 1,
2653                                     context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2654                                     context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2655                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
2656 
2657                                 // Update the cr DC Sign Level Coeff Neighbor Array
2658                                  dcSignLevelCoeff =
2659                                     (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
2660                                 neighbor_array_unit_mode_write(
2661                                     pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
2662                                     (uint8_t*)&dcSignLevelCoeff,
2663                                     ROUND_UV(txb_origin_x) >> 1,
2664                                     ROUND_UV(txb_origin_y) >> 1,
2665                                     context_ptr->blk_geom->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2666                                     context_ptr->blk_geom->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
2667                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
2668                             }
2669 
2670                         } // Transform Loop
2671                         // Calculate Root CBF
2672                         if (context_ptr->blk_geom->has_uv)
2673                             blk_ptr->block_has_coeff = (y_has_coeff | u_has_coeff | v_has_coeff) ? EB_TRUE : EB_FALSE;
2674                         else
2675                             blk_ptr->block_has_coeff = (y_has_coeff) ? EB_TRUE : EB_FALSE;
2676 
2677                         // Update the Intra-specific Neighbor Arrays
2678                         encode_pass_update_intra_mode_neighbor_arrays(
2679                             ep_mode_type_neighbor_array,
2680                             ep_intra_luma_mode_neighbor_array,
2681                             ep_intra_chroma_mode_neighbor_array,
2682                             (uint8_t)blk_ptr->pred_mode,
2683                             (uint8_t)pu_ptr->intra_chroma_mode,
2684                             context_ptr->blk_origin_x,
2685                             context_ptr->blk_origin_y,
2686                             context_ptr->blk_geom->bwidth,
2687                             context_ptr->blk_geom->bheight,
2688                             context_ptr->blk_geom->bwidth_uv,
2689                             context_ptr->blk_geom->bheight_uv,
2690                             blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK
2691                             : PICTURE_BUFFER_DESC_LUMA_MASK);
2692 
2693                         // Update Recon Samples-INTRA-
2694                         encode_pass_update_recon_sample_neighbour_arrays(
2695                             ep_luma_recon_neighbor_array,
2696                             ep_cb_recon_neighbor_array,
2697                             ep_cr_recon_neighbor_array,
2698                             recon_buffer,
2699                             context_ptr->blk_origin_x,
2700                             context_ptr->blk_origin_y,
2701                             context_ptr->blk_geom->bwidth,
2702                             context_ptr->blk_geom->bheight,
2703                             context_ptr->blk_geom->bwidth_uv,
2704                             context_ptr->blk_geom->bheight_uv,
2705                             context_ptr->blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK : PICTURE_BUFFER_DESC_LUMA_MASK,
2706                             is_16bit);
2707                     }
2708                 }
2709 
2710                 // Inter
2711                 else if (blk_ptr->prediction_mode_flag == INTER_MODE) {
2712                     uint8_t is_inter = context_ptr->is_inter = 1;
2713                     MvReferenceFrame rf[2];
2714                     av1_set_ref_frame(rf, (&blk_ptr->prediction_unit_array[0])->ref_frame_type);
2715                     int8_t ref_idx_l0 = get_ref_frame_idx(rf[0]);
2716                     int8_t ref_idx_l1 = rf[1] == NONE_FRAME ? get_ref_frame_idx(rf[0]) : get_ref_frame_idx(rf[1]);
2717                     uint8_t list_idx0, list_idx1;
2718                     list_idx0 = get_list_idx(rf[0]);
2719                     if (rf[1] == NONE_FRAME)
2720                         list_idx1 = get_list_idx(rf[0]);
2721                     else
2722                         list_idx1 = get_list_idx(rf[1]);
2723                     EbReferenceObject *ref_obj_0 =
2724                         ref_idx_l0 >= 0
2725                             ? (EbReferenceObject *)pcs_ptr->ref_pic_ptr_array[list_idx0][ref_idx_l0]
2726                                   ->object_ptr
2727                             : (EbReferenceObject *)NULL;
2728                     EbReferenceObject *ref_obj_1 =
2729                         ref_idx_l1 >= 0
2730                             ? (EbReferenceObject *)pcs_ptr->ref_pic_ptr_array[list_idx1][ref_idx_l1]
2731                                   ->object_ptr
2732                             : (EbReferenceObject *)NULL;
2733                     uint16_t txb_origin_x;
2734                     uint16_t txb_origin_y;
2735                     EbBool   is_blk_skip = EB_FALSE;
2736 
2737                     //********************************
2738                     //        INTER
2739                     //********************************
2740                     // Perform Merge/Skip Decision if the mode coming from MD is merge. for the First CU in Row merge will remain as is.
2741                     if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE) {
2742                         is_blk_skip =
2743                             md_context_ptr->md_ep_pipe_sb[blk_ptr->mds_idx].skip_cost <=
2744                                     md_context_ptr->md_ep_pipe_sb[blk_ptr->mds_idx].merge_cost
2745                                 ? 1
2746                                 : 0;
2747                     }
2748                     //keep final usefull mvp for entropy
2749                     svt_memcpy(blk_ptr->av1xd->final_ref_mv_stack,
2750                            context_ptr->md_context
2751                                ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
2752                                .ed_ref_mv_stack[blk_ptr->prediction_unit_array[0].ref_frame_type],
2753                            sizeof(CandidateMv) * MAX_REF_MV_STACK_SIZE);
2754 
2755                         // Store drl_ctx in blk to avoid storing final_ref_mv_stack for EC
2756                         uint8_t      ref_frame_type_tmp = blk_ptr->prediction_unit_array[0].ref_frame_type;
2757                         if (blk_ptr->pred_mode == NEWMV || blk_ptr->pred_mode == NEW_NEWMV) {
2758                             int32_t idx;
2759                             for (idx = 0; idx < 2; ++idx) {
2760                                 if (blk_ptr->av1xd->ref_mv_count[ref_frame_type_tmp] > idx + 1)
2761                                     blk_ptr->drl_ctx[idx] = av1_drl_ctx(blk_ptr->av1xd->final_ref_mv_stack, idx);
2762                                 else
2763                                     blk_ptr->drl_ctx[idx] = -1;
2764                             }
2765                         }
2766 
2767                         if (have_nearmv_in_inter_mode(blk_ptr->pred_mode)) {
2768                             int32_t idx;
2769                             // TODO(jingning): Temporary solution to compensate the NEARESTMV offset.
2770                             for (idx = 1; idx < 3; ++idx) {
2771                                 if (blk_ptr->av1xd->ref_mv_count[ref_frame_type_tmp] > idx + 1)
2772                                     blk_ptr->drl_ctx_near[idx - 1] = av1_drl_ctx(blk_ptr->av1xd->final_ref_mv_stack, idx);
2773                                 else
2774                                     blk_ptr->drl_ctx_near[idx - 1] = -1;
2775                             }
2776                         }
2777 
2778                     {
2779                         // 1st Partition Loop
2780                         pu_ptr = blk_ptr->prediction_unit_array;
2781 
2782                         // Set MvUnit
2783                         context_ptr->mv_unit.pred_direction =
2784                             (uint8_t)pu_ptr->inter_pred_direction_index;
2785                         context_ptr->mv_unit.mv[REF_LIST_0].mv_union =
2786                             pu_ptr->mv[REF_LIST_0].mv_union;
2787                         context_ptr->mv_unit.mv[REF_LIST_1].mv_union =
2788                             pu_ptr->mv[REF_LIST_1].mv_union;
2789 
2790                         // Inter Prediction
2791                         if (pu_ptr->motion_mode == WARPED_CAUSAL) {
2792                             EbPictureBufferDesc             *ref_pic_list0;
2793                             EbPictureBufferDesc             *ref_pic_list1;
2794                             if (!is_16bit) {
2795                                 ref_pic_list0 = ref_idx_l0 >= 0
2796                                     ? ref_obj_0->reference_picture
2797                                     : (EbPictureBufferDesc *)NULL;
2798                                 ref_pic_list1 = ref_idx_l1 >= 0
2799                                     ? ref_obj_1->reference_picture
2800                                     : (EbPictureBufferDesc *)NULL;
2801                             }
2802                             else {
2803                                 ref_pic_list0 = ref_idx_l0 >= 0
2804                                     ? ref_obj_0->reference_picture16bit
2805                                     : (EbPictureBufferDesc *)NULL;
2806                                 ref_pic_list1 = ref_idx_l1 >= 0
2807                                     ? ref_obj_1->reference_picture16bit
2808                                     : (EbPictureBufferDesc *)NULL;
2809                             }
2810                                 warped_motion_prediction(
2811                                     pcs_ptr,
2812                                     &context_ptr->mv_unit,
2813                                     blk_ptr->prediction_unit_array[0].ref_frame_type,
2814                                     context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].compound_idx,
2815                                     &context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].interinter_comp,
2816                                     context_ptr->blk_origin_x,
2817                                     context_ptr->blk_origin_y,
2818                                     blk_ptr,
2819                                     blk_geom,
2820                                     ref_pic_list0,
2821                                     ref_pic_list1,
2822                                     recon_buffer,
2823                                     context_ptr->blk_origin_x,
2824                                     context_ptr->blk_origin_y,
2825                                     ep_luma_recon_neighbor_array,
2826                                     ep_cb_recon_neighbor_array,
2827                                     ep_cr_recon_neighbor_array,
2828                                     NULL,
2829 
2830                                     &context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].wm_params_l0,
2831                                     &context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].wm_params_l1,
2832                                     (uint8_t)scs_ptr->static_config.encoder_bit_depth,
2833                                     EB_TRUE,
2834                                     EB_TRUE);
2835                         }
2836 
2837                         if (pu_ptr->motion_mode != WARPED_CAUSAL) {
2838                             EbPictureBufferDesc *ref_pic_list0;
2839                             EbPictureBufferDesc *ref_pic_list1;
2840 
2841                             if (!is_16bit) {
2842                                 ref_pic_list0 = ref_idx_l0 >= 0
2843                                     ? ref_obj_0->reference_picture
2844                                     : (EbPictureBufferDesc *)NULL;
2845                                 ref_pic_list1 = ref_idx_l1 >= 0
2846                                     ? ref_obj_1->reference_picture
2847                                     : (EbPictureBufferDesc *)NULL;
2848                             } else {
2849                                 ref_pic_list0 = ref_idx_l0 >= 0
2850                                     ? ref_obj_0->reference_picture16bit
2851                                     : (EbPictureBufferDesc *)NULL;
2852                                 ref_pic_list1 = ref_idx_l1 >= 0
2853                                     ? ref_obj_1->reference_picture16bit
2854                                     : (EbPictureBufferDesc *)NULL;
2855                             }
2856 
2857 
2858                             if (is_16bit && !(scs_ptr->static_config.superres_mode > SUPERRES_NONE)) {
2859                                 av1_inter_prediction_16bit_pipeline(
2860                                     pcs_ptr,
2861                                     blk_ptr->interp_filters,
2862                                     blk_ptr,
2863                                     blk_ptr->prediction_unit_array->ref_frame_type,
2864                                     &context_ptr->mv_unit,
2865                                     0, //use_intrabc,
2866                                     blk_ptr->prediction_unit_array->motion_mode,
2867                                     0, //use_precomputed_obmc,
2868                                     0,
2869                                     blk_ptr->compound_idx,
2870                                     &blk_ptr->interinter_comp,
2871                                     ep_luma_recon_neighbor_array,
2872                                     ep_cb_recon_neighbor_array,
2873                                     ep_cr_recon_neighbor_array,
2874                                     blk_ptr->is_interintra_used,
2875                                     blk_ptr->interintra_mode,
2876                                     blk_ptr->use_wedge_interintra,
2877                                     blk_ptr->interintra_wedge_index,
2878                                     context_ptr->blk_origin_x,
2879                                     context_ptr->blk_origin_y,
2880                                     blk_geom->bwidth,
2881                                     blk_geom->bheight,
2882                                     ref_pic_list0,
2883                                     ref_pic_list1,
2884                                     recon_buffer,
2885                                     context_ptr->blk_origin_x,
2886                                     context_ptr->blk_origin_y,
2887                                     EB_TRUE,
2888                                     (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2889                             } else {
2890                                 av1_inter_prediction(
2891                                     scs_ptr,
2892                                     pcs_ptr,
2893                                     blk_ptr->interp_filters,
2894                                     blk_ptr,
2895                                     blk_ptr->prediction_unit_array->ref_frame_type,
2896                                     &context_ptr->mv_unit,
2897                                     0, //use_intrabc,
2898                                     blk_ptr->prediction_unit_array->motion_mode,
2899                                     0, //use_precomputed_obmc,
2900                                     0,
2901                                     blk_ptr->compound_idx,
2902                                     &blk_ptr->interinter_comp,
2903                                     ep_luma_recon_neighbor_array,
2904                                     ep_cb_recon_neighbor_array,
2905                                     ep_cr_recon_neighbor_array,
2906                                     blk_ptr->is_interintra_used,
2907                                     blk_ptr->interintra_mode,
2908                                     blk_ptr->use_wedge_interintra,
2909                                     blk_ptr->interintra_wedge_index,
2910 
2911                                     context_ptr->blk_origin_x,
2912                                     context_ptr->blk_origin_y,
2913                                     blk_geom->bwidth,
2914                                     blk_geom->bheight,
2915                                     ref_pic_list0,
2916                                     ref_pic_list1,
2917                                     recon_buffer,
2918                                     context_ptr->blk_origin_x,
2919                                     context_ptr->blk_origin_y,
2920                                     EB_TRUE,
2921                                     (uint8_t)scs_ptr->static_config.encoder_bit_depth);
2922                             }
2923                         }
2924                     }
2925                     context_ptr->txb_itr = 0;
2926                     // Transform Loop
2927                     context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[0] = EB_FALSE;
2928                     context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[0] = EB_FALSE;
2929                     context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[0] = EB_FALSE;
2930 
2931                     uint16_t tot_tu         = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
2932                     if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_FALSE) {
2933                         for (uint16_t tu_it = 0; tu_it < tot_tu; tu_it++) {
2934                             context_ptr->txb_itr = (uint8_t)tu_it;
2935                             uint8_t uv_pass =
2936                                 blk_ptr->tx_depth && tu_it ? 0 : 1; //NM: 128x128 exeption
2937                             txb_origin_x =
2938                                 context_ptr->blk_origin_x +
2939                                 context_ptr->blk_geom->tx_org_x[is_inter][blk_ptr->tx_depth][tu_it] -
2940                                 context_ptr->blk_geom->origin_x;
2941                             txb_origin_y =
2942                                 context_ptr->blk_origin_y +
2943                                 context_ptr->blk_geom->tx_org_y[is_inter][blk_ptr->tx_depth][tu_it] -
2944                                 context_ptr->blk_geom->origin_y;
2945 
2946                             context_ptr->md_context->luma_txb_skip_context = 0;
2947                             context_ptr->md_context->luma_dc_sign_context  = 0;
2948                             get_txb_ctx(pcs_ptr,
2949                                         COMPONENT_LUMA,
2950                                         pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
2951                                         txb_origin_x,
2952                                         txb_origin_y,
2953                                         context_ptr->blk_geom->bsize,
2954                                         context_ptr->blk_geom
2955                                             ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
2956                                         &context_ptr->md_context->luma_txb_skip_context,
2957                                         &context_ptr->md_context->luma_dc_sign_context);
2958 
2959                             if (context_ptr->blk_geom->has_uv && uv_pass) {
2960                                 context_ptr->md_context->cb_txb_skip_context = 0;
2961                                 context_ptr->md_context->cb_dc_sign_context  = 0;
2962                                 get_txb_ctx(
2963                                     pcs_ptr,
2964                                     COMPONENT_CHROMA,
2965                                     pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
2966                                     ROUND_UV(txb_origin_x) >> 1,
2967                                     ROUND_UV(txb_origin_y) >> 1,
2968                                     context_ptr->blk_geom->bsize_uv,
2969                                     context_ptr->blk_geom->txsize_uv[context_ptr->blk_ptr->tx_depth]
2970                                                                     [context_ptr->txb_itr],
2971                                     &context_ptr->md_context->cb_txb_skip_context,
2972                                     &context_ptr->md_context->cb_dc_sign_context);
2973 
2974                                 context_ptr->md_context->cr_txb_skip_context = 0;
2975                                 context_ptr->md_context->cr_dc_sign_context  = 0;
2976                                 get_txb_ctx(pcs_ptr,
2977                                             COMPONENT_CHROMA,
2978                                             pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
2979                                             ROUND_UV(txb_origin_x) >> 1,
2980                                             ROUND_UV(txb_origin_y) >> 1,
2981                                             context_ptr->blk_geom->bsize_uv,
2982                                             context_ptr->blk_geom->txsize_uv[blk_ptr->tx_depth]
2983                                                                             [context_ptr->txb_itr],
2984                                             &context_ptr->md_context->cr_txb_skip_context,
2985                                             &context_ptr->md_context->cr_dc_sign_context);
2986                             }
2987 
2988                             //inter mode  1
2989                             av1_encode_loop_func_table[is_16bit](
2990                                 pcs_ptr,
2991                                 context_ptr,
2992                                 sb_ptr,
2993                                 txb_origin_x, //pic org
2994                                 txb_origin_y,
2995                                 recon_buffer,
2996                                 coeff_buffer_sb,
2997                                 residual_buffer,
2998                                 transform_buffer,
2999                                 inverse_quant_buffer,
3000                                 count_non_zero_coeffs,
3001                                 context_ptr->blk_geom->has_uv && uv_pass
3002                                     ? PICTURE_BUFFER_DESC_FULL_MASK
3003                                     : PICTURE_BUFFER_DESC_LUMA_MASK,
3004                                 eobs[context_ptr->txb_itr]);
3005                                 context_ptr->md_context
3006                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3007                                     .y_has_coeff[context_ptr->txb_itr] =
3008                                     count_non_zero_coeffs[0] != 0 ? EB_TRUE : EB_FALSE;
3009                                 context_ptr->md_context
3010                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3011                                     .u_has_coeff[context_ptr->txb_itr] =
3012                                     count_non_zero_coeffs[1] != 0 ? EB_TRUE : EB_FALSE;
3013                                 context_ptr->md_context
3014                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3015                                     .v_has_coeff[context_ptr->txb_itr] =
3016                                     count_non_zero_coeffs[2] != 0 ? EB_TRUE : EB_FALSE;
3017                                 // Update count_non_zero_coeffs after CBF decision
3018                                 if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] ==
3019                                     EB_FALSE)
3020                                     count_non_zero_coeffs[0] = 0;
3021                                 if (context_ptr->blk_geom->has_uv && uv_pass) {
3022                                     if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3023                                         count_non_zero_coeffs[1] = 0;
3024                                     if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3025                                         count_non_zero_coeffs[2] = 0;
3026                                 }
3027 
3028                                 // Update TU count_non_zero_coeffs
3029                                 blk_ptr->txb_array[context_ptr->txb_itr].nz_coef_count[0] =
3030                                     (uint16_t)count_non_zero_coeffs[0];
3031                                 blk_ptr->txb_array[context_ptr->txb_itr].nz_coef_count[1] =
3032                                     (uint16_t)count_non_zero_coeffs[1];
3033                                 blk_ptr->txb_array[context_ptr->txb_itr].nz_coef_count[2] =
3034                                     (uint16_t)count_non_zero_coeffs[2];
3035                                 if (pcs_ptr->cdf_ctrl.update_coef) {
3036                                     ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
3037                                         context_ptr->md_context->candidate_buffer_ptr_array;
3038                                     ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
3039                                         &(candidate_buffer_ptr_array_base[0]);
3040                                     ModeDecisionCandidateBuffer *candidate_buffer;
3041 
3042                                     // Set the Candidate Buffer
3043                                     candidate_buffer = candidate_buffer_ptr_array[0];
3044                                     // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
3045                                     candidate_buffer->candidate_ptr->type =
3046                                         blk_ptr->prediction_mode_flag;
3047                                     candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
3048                                     candidate_buffer->candidate_ptr->filter_intra_mode =
3049                                         blk_ptr->filter_intra_mode;
3050                                     const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
3051 
3052                                     //CHKN add updating eobs[] after CBF decision
3053                                     if (context_ptr->md_context
3054                                             ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3055                                             .y_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3056                                         eobs[context_ptr->txb_itr][0] = 0;
3057                                     if (context_ptr->blk_geom->has_uv && uv_pass) {
3058                                         if (context_ptr->md_context
3059                                                 ->md_local_blk_unit[context_ptr->blk_geom
3060                                                                         ->blkidx_mds]
3061                                                 .u_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3062                                             eobs[context_ptr->txb_itr][1] = 0;
3063                                         if (context_ptr->md_context
3064                                                 ->md_local_blk_unit[context_ptr->blk_geom
3065                                                                         ->blkidx_mds]
3066                                                 .v_has_coeff[context_ptr->txb_itr] == EB_FALSE)
3067                                             eobs[context_ptr->txb_itr][2] = 0;
3068                                     }
3069 
3070                                     av1_txb_estimate_coeff_bits(
3071                                         context_ptr->md_context,
3072                                         1, //allow_update_cdf,
3073                                         &pcs_ptr->ec_ctx_array[sb_addr],
3074                                         pcs_ptr,
3075                                         candidate_buffer,
3076                                         coeff1d_offset,
3077                                         context_ptr->coded_area_sb_uv,
3078                                         coeff_buffer_sb,
3079                                         eobs[context_ptr->txb_itr][0],
3080                                         eobs[context_ptr->txb_itr][1],
3081                                         eobs[context_ptr->txb_itr][2],
3082                                         &y_txb_coeff_bits,
3083                                         &cb_txb_coeff_bits,
3084                                         &cr_txb_coeff_bits,
3085                                         context_ptr->blk_geom
3086                                             ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
3087                                         context_ptr->blk_geom
3088                                             ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3089                                         blk_ptr->txb_array[context_ptr->txb_itr]
3090                                             .transform_type[PLANE_TYPE_Y],
3091                                         blk_ptr->txb_array[context_ptr->txb_itr]
3092                                             .transform_type[PLANE_TYPE_UV],
3093                                         context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL
3094                                                                                  : COMPONENT_LUMA);
3095                                 }
3096                             context_ptr->coded_area_sb +=
3097                                 blk_geom->tx_width[blk_ptr->tx_depth][tu_it] *
3098                                 blk_geom->tx_height[blk_ptr->tx_depth][tu_it];
3099                             if (context_ptr->blk_geom->has_uv && uv_pass)
3100                                 context_ptr->coded_area_sb_uv +=
3101                                     blk_geom->tx_width_uv[blk_ptr->tx_depth][tu_it] *
3102                                     blk_geom->tx_height_uv[blk_ptr->tx_depth][tu_it];
3103 
3104                             // Update the luma Dc Sign Level Coeff Neighbor Array
3105                             {
3106                                 uint8_t dc_sign_level_coeff =
3107                                     (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
3108                                 neighbor_array_unit_mode_write(
3109                                     pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
3110                                     (uint8_t *)&dc_sign_level_coeff,
3111                                     txb_origin_x,
3112                                     txb_origin_y,
3113                                     context_ptr->blk_geom
3114                                         ->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
3115                                     context_ptr->blk_geom
3116                                         ->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
3117                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3118                             }
3119 
3120                             if (context_ptr->blk_geom->has_uv && uv_pass) {
3121                                 // Update the cb Dc Sign Level Coeff Neighbor Array
3122                                 uint8_t dc_sign_level_coeff =
3123                                     (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
3124                                 neighbor_array_unit_mode_write(
3125                                     pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
3126                                     (uint8_t *)&dc_sign_level_coeff,
3127                                     ROUND_UV(txb_origin_x) >> 1,
3128                                     ROUND_UV(txb_origin_y) >> 1,
3129                                     context_ptr->blk_geom
3130                                         ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3131                                     context_ptr->blk_geom
3132                                         ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3133                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3134 
3135                                 // Update the cr DC Sign Level Coeff Neighbor Array
3136                                  dc_sign_level_coeff =
3137                                     (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
3138                                 neighbor_array_unit_mode_write(
3139                                     pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
3140                                     (uint8_t *)&dc_sign_level_coeff,
3141                                     ROUND_UV(txb_origin_x) >> 1,
3142                                     ROUND_UV(txb_origin_y) >> 1,
3143                                     context_ptr->blk_geom
3144                                         ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3145                                     context_ptr->blk_geom
3146                                         ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3147                                     NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3148                             }
3149 
3150                         } // Transform Loop
3151                     }
3152 
3153                     //Set Final CU data flags after skip/Merge decision.
3154                     if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE) {
3155                         blk_ptr->skip_flag = (is_blk_skip) ? EB_TRUE : EB_FALSE;
3156                         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed =
3157                             (is_blk_skip) ? EB_FALSE : EB_TRUE;
3158                     }
3159 
3160                     // Initialize the Transform Loop
3161 
3162                     context_ptr->txb_itr = 0;
3163                     y_has_coeff          = 0;
3164                     u_has_coeff          = 0;
3165                     v_has_coeff          = 0;
3166                     tot_tu               = context_ptr->blk_geom->txb_count[blk_ptr->tx_depth];
3167 
3168                     //reset coeff buffer offsets at the start of a new Tx loop
3169                     context_ptr->coded_area_sb    = coded_area_org;
3170                     context_ptr->coded_area_sb_uv = coded_area_org_uv;
3171                     for (uint16_t tu_it = 0; tu_it < tot_tu; tu_it++) {
3172                         uint8_t uv_pass = blk_ptr->tx_depth && tu_it ? 0 : 1; //NM: 128x128 exeption
3173                         context_ptr->txb_itr = (uint8_t)tu_it;
3174                         txb_origin_x = context_ptr->blk_origin_x +
3175                                        (context_ptr->blk_geom
3176                                             ->tx_org_x[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
3177                                         context_ptr->blk_geom->origin_x);
3178                         txb_origin_y = context_ptr->blk_origin_y +
3179                                        (context_ptr->blk_geom
3180                                             ->tx_org_y[is_inter][blk_ptr->tx_depth][context_ptr->txb_itr] -
3181                                         context_ptr->blk_geom->origin_y);
3182                         context_ptr->md_context->luma_txb_skip_context = 0;
3183                         context_ptr->md_context->luma_dc_sign_context  = 0;
3184                         get_txb_ctx(
3185                             pcs_ptr,
3186                             COMPONENT_LUMA,
3187                             pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
3188                             txb_origin_x,
3189                             txb_origin_y,
3190                             context_ptr->blk_geom->bsize,
3191                             context_ptr->blk_geom->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
3192                             &context_ptr->md_context->luma_txb_skip_context,
3193                             &context_ptr->md_context->luma_dc_sign_context);
3194 
3195                         if (context_ptr->blk_geom->has_uv && uv_pass) {
3196                             context_ptr->md_context->cb_txb_skip_context = 0;
3197                             context_ptr->md_context->cb_dc_sign_context  = 0;
3198                             get_txb_ctx(
3199                                 pcs_ptr,
3200                                 COMPONENT_CHROMA,
3201                                 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
3202                                 ROUND_UV(txb_origin_x) >> 1,
3203                                 ROUND_UV(txb_origin_y) >> 1,
3204                                 context_ptr->blk_geom->bsize_uv,
3205                                 context_ptr->blk_geom->txsize_uv[context_ptr->blk_ptr->tx_depth]
3206                                                                 [context_ptr->txb_itr],
3207                                 &context_ptr->md_context->cb_txb_skip_context,
3208                                 &context_ptr->md_context->cb_dc_sign_context);
3209 
3210                             context_ptr->md_context->cr_txb_skip_context = 0;
3211                             context_ptr->md_context->cr_dc_sign_context  = 0;
3212                             get_txb_ctx(pcs_ptr,
3213                                         COMPONENT_CHROMA,
3214                                         pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
3215                                         ROUND_UV(txb_origin_x) >> 1,
3216                                         ROUND_UV(txb_origin_y) >> 1,
3217                                         context_ptr->blk_geom->bsize_uv,
3218                                         context_ptr->blk_geom
3219                                             ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3220                                         &context_ptr->md_context->cr_txb_skip_context,
3221                                         &context_ptr->md_context->cr_dc_sign_context);
3222                         }
3223                         if (blk_ptr->skip_flag == EB_TRUE) {
3224                             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].y_has_coeff[context_ptr->txb_itr] = EB_FALSE;
3225                             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].u_has_coeff[context_ptr->txb_itr] = EB_FALSE;
3226                             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].v_has_coeff[context_ptr->txb_itr] = EB_FALSE;
3227 
3228                             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr] = 0;
3229                             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr] = 0;
3230                             context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr] = 0;
3231                         } else if (context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE) {
3232 
3233                             //inter mode  2
3234 
3235                             av1_encode_loop_func_table[is_16bit](
3236                                 pcs_ptr,
3237                                 context_ptr,
3238                                 sb_ptr,
3239                                 txb_origin_x, //pic offset
3240                                 txb_origin_y,
3241                                 recon_buffer,
3242                                 coeff_buffer_sb,
3243                                 residual_buffer,
3244                                 transform_buffer,
3245                                 inverse_quant_buffer,
3246                                 count_non_zero_coeffs,
3247                                 context_ptr->blk_geom->has_uv && uv_pass
3248                                     ? PICTURE_BUFFER_DESC_FULL_MASK
3249                                     : PICTURE_BUFFER_DESC_LUMA_MASK,
3250                                 eobs[context_ptr->txb_itr]);
3251 
3252 
3253                             if (pcs_ptr->cdf_ctrl.update_coef) {
3254                                 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array_base =
3255                                     context_ptr->md_context->candidate_buffer_ptr_array;
3256                                 ModeDecisionCandidateBuffer **candidate_buffer_ptr_array =
3257                                     &(candidate_buffer_ptr_array_base[0]);
3258                                 ModeDecisionCandidateBuffer *candidate_buffer;
3259 
3260                                 // Set the Candidate Buffer
3261                                 candidate_buffer = candidate_buffer_ptr_array[0];
3262                                 // Rate estimation function uses the values from CandidatePtr. The right values are copied from blk_ptr to CandidatePtr
3263                                 candidate_buffer->candidate_ptr->type =
3264                                     blk_ptr->prediction_mode_flag;
3265                                 candidate_buffer->candidate_ptr->pred_mode = blk_ptr->pred_mode;
3266                                 candidate_buffer->candidate_ptr->filter_intra_mode =
3267                                     blk_ptr->filter_intra_mode;
3268                                 const uint32_t coeff1d_offset = context_ptr->coded_area_sb;
3269 
3270                                 av1_txb_estimate_coeff_bits(
3271                                     context_ptr->md_context,
3272                                     1, //allow_update_cdf,
3273                                     &pcs_ptr->ec_ctx_array[sb_addr],
3274                                     pcs_ptr,
3275                                     candidate_buffer,
3276                                     coeff1d_offset,
3277                                     context_ptr->coded_area_sb_uv,
3278                                     coeff_buffer_sb,
3279                                     eobs[context_ptr->txb_itr][0],
3280                                     eobs[context_ptr->txb_itr][1],
3281                                     eobs[context_ptr->txb_itr][2],
3282                                     &y_txb_coeff_bits,
3283                                     &cb_txb_coeff_bits,
3284                                     &cr_txb_coeff_bits,
3285                                     context_ptr->blk_geom
3286                                         ->txsize[blk_ptr->tx_depth][context_ptr->txb_itr],
3287                                     context_ptr->blk_geom
3288                                         ->txsize_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3289                                     blk_ptr->txb_array[context_ptr->txb_itr]
3290                                         .transform_type[PLANE_TYPE_Y],
3291                                     blk_ptr->txb_array[context_ptr->txb_itr]
3292                                         .transform_type[PLANE_TYPE_UV],
3293                                     context_ptr->blk_geom->has_uv && uv_pass ? COMPONENT_ALL
3294                                                                              : COMPONENT_LUMA);
3295                             }
3296                         }
3297                         if (context_ptr->blk_geom->has_uv && uv_pass) {
3298                             blk_ptr->block_has_coeff =
3299                                 blk_ptr->block_has_coeff |
3300                                 context_ptr->md_context
3301                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3302                                     .y_has_coeff[context_ptr->txb_itr] |
3303                                 context_ptr->md_context
3304                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3305                                     .u_has_coeff[context_ptr->txb_itr] |
3306                                 context_ptr->md_context
3307                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3308                                     .v_has_coeff[context_ptr->txb_itr];
3309                         } else {
3310                             blk_ptr->block_has_coeff =
3311                                 blk_ptr->block_has_coeff |
3312                                 context_ptr->md_context
3313                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3314                                     .y_has_coeff[context_ptr->txb_itr];
3315                         }
3316 
3317                         //inter mode
3318                         av1_enc_gen_recon_func_ptr[is_16bit](
3319                             context_ptr,
3320                             txb_origin_x, //pic offset
3321                             txb_origin_y,
3322                             recon_buffer,
3323                             inverse_quant_buffer,
3324                             context_ptr->blk_geom->has_uv && uv_pass
3325                                 ? PICTURE_BUFFER_DESC_FULL_MASK
3326                                 : PICTURE_BUFFER_DESC_LUMA_MASK,
3327                             eobs[context_ptr->txb_itr]);
3328 
3329                         if (context_ptr->blk_geom->has_uv && uv_pass) {
3330                             y_has_coeff |=
3331                                 context_ptr->md_context
3332                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3333                                     .y_has_coeff[context_ptr->txb_itr];
3334                             u_has_coeff |=
3335                                 context_ptr->md_context
3336                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3337                                     .u_has_coeff[context_ptr->txb_itr];
3338                             v_has_coeff |=
3339                                 context_ptr->md_context
3340                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3341                                     .v_has_coeff[context_ptr->txb_itr];
3342                         } else
3343                             y_has_coeff |=
3344                                 context_ptr->md_context
3345                                     ->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds]
3346                                     .y_has_coeff[context_ptr->txb_itr];
3347 
3348                         context_ptr->coded_area_sb += blk_geom->tx_width[blk_ptr->tx_depth][tu_it] *
3349                                                       blk_geom->tx_height[blk_ptr->tx_depth][tu_it];
3350 
3351                         if (context_ptr->blk_geom->has_uv && uv_pass)
3352                             context_ptr->coded_area_sb_uv +=
3353                                 blk_geom->tx_width_uv[blk_ptr->tx_depth][tu_it] *
3354                                 blk_geom->tx_height_uv[blk_ptr->tx_depth][tu_it];
3355 
3356                         // Update the luma Dc Sign Level Coeff Neighbor Array
3357                         {
3358                             uint8_t dc_sign_level_coeff =
3359                                 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[0][context_ptr->txb_itr];
3360                             neighbor_array_unit_mode_write(
3361                                 pcs_ptr->ep_luma_dc_sign_level_coeff_neighbor_array[tile_idx],
3362                                 (uint8_t *)&dc_sign_level_coeff,
3363                                 txb_origin_x,
3364                                 txb_origin_y,
3365                                 context_ptr->blk_geom
3366                                     ->tx_width[blk_ptr->tx_depth][context_ptr->txb_itr],
3367                                 context_ptr->blk_geom
3368                                     ->tx_height[blk_ptr->tx_depth][context_ptr->txb_itr],
3369                                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3370                         }
3371 
3372                         // Update the cb Dc Sign Level Coeff Neighbor Array
3373                         if (context_ptr->blk_geom->has_uv && uv_pass) {
3374                             uint8_t dc_sign_level_coeff =
3375                                 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[1][context_ptr->txb_itr];
3376                             neighbor_array_unit_mode_write(
3377                                 pcs_ptr->ep_cb_dc_sign_level_coeff_neighbor_array[tile_idx],
3378                                 (uint8_t *)&dc_sign_level_coeff,
3379                                 ROUND_UV(txb_origin_x) >> 1,
3380                                 ROUND_UV(txb_origin_y) >> 1,
3381                                 context_ptr->blk_geom
3382                                     ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3383                                 context_ptr->blk_geom
3384                                     ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3385                                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3386                             // Update the cr DC Sign Level Coeff Neighbor Array
3387                             dc_sign_level_coeff =
3388                                 (uint8_t)context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].quantized_dc[2][context_ptr->txb_itr];
3389                             neighbor_array_unit_mode_write(
3390                                 pcs_ptr->ep_cr_dc_sign_level_coeff_neighbor_array[tile_idx],
3391                                 (uint8_t *)&dc_sign_level_coeff,
3392                                 ROUND_UV(txb_origin_x) >> 1,
3393                                 ROUND_UV(txb_origin_y) >> 1,
3394                                 context_ptr->blk_geom
3395                                     ->tx_width_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3396                                 context_ptr->blk_geom
3397                                     ->tx_height_uv[blk_ptr->tx_depth][context_ptr->txb_itr],
3398                                 NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3399                         }
3400 
3401                     } // Transform Loop
3402 
3403                     // Calculate Root CBF
3404                     if (context_ptr->blk_geom->has_uv)
3405                         blk_ptr->block_has_coeff =
3406                             (y_has_coeff | u_has_coeff | v_has_coeff) ? EB_TRUE : EB_FALSE;
3407                     else
3408                         blk_ptr->block_has_coeff = (y_has_coeff) ? EB_TRUE : EB_FALSE;
3409 
3410                     // Force Skip if MergeFlag == TRUE && RootCbf == 0
3411 
3412                     if (blk_ptr->skip_flag == EB_FALSE &&
3413                         context_ptr->md_context->md_local_blk_unit[context_ptr->blk_geom->blkidx_mds].skip_mode_allowed == EB_TRUE &&
3414                         blk_ptr->block_has_coeff == EB_FALSE) {
3415                         blk_ptr->skip_flag = EB_TRUE;
3416                     }
3417 
3418                     {
3419                         // Set the PU Loop Variables
3420                         pu_ptr = blk_ptr->prediction_unit_array;
3421 
3422                         // Set MvUnit
3423                         context_ptr->mv_unit.pred_direction =
3424                             (uint8_t)pu_ptr->inter_pred_direction_index;
3425                         context_ptr->mv_unit.mv[REF_LIST_0].mv_union =
3426                             pu_ptr->mv[REF_LIST_0].mv_union;
3427                         context_ptr->mv_unit.mv[REF_LIST_1].mv_union =
3428                             pu_ptr->mv[REF_LIST_1].mv_union;
3429 
3430                         // Update Neighbor Arrays (Mode Type, mvs, SKIP)
3431                         {
3432                             uint8_t skip_flag = (uint8_t)blk_ptr->skip_flag;
3433                             encode_pass_update_inter_mode_neighbor_arrays(
3434                                 ep_mode_type_neighbor_array,
3435                                 ep_mv_neighbor_array,
3436                                 ep_skip_flag_neighbor_array,
3437                                 &context_ptr->mv_unit,
3438                                 &skip_flag,
3439                                 context_ptr->blk_origin_x,
3440                                 context_ptr->blk_origin_y,
3441                                 blk_geom->bwidth,
3442                                 blk_geom->bheight);
3443                         }
3444                     } // 2nd Partition Loop
3445 
3446                     // Update Recon Samples Neighbor Arrays -INTER-
3447                     encode_pass_update_recon_sample_neighbour_arrays(
3448                         ep_luma_recon_neighbor_array,
3449                         ep_cb_recon_neighbor_array,
3450                         ep_cr_recon_neighbor_array,
3451                         recon_buffer,
3452                         context_ptr->blk_origin_x,
3453                         context_ptr->blk_origin_y,
3454                         context_ptr->blk_geom->bwidth,
3455                         context_ptr->blk_geom->bheight,
3456                         context_ptr->blk_geom->bwidth_uv,
3457                         context_ptr->blk_geom->bheight_uv,
3458                         context_ptr->blk_geom->has_uv ? PICTURE_BUFFER_DESC_FULL_MASK
3459                                                         : PICTURE_BUFFER_DESC_LUMA_MASK,
3460                         is_16bit);
3461 
3462                 } else {
3463                     CHECK_REPORT_ERROR_NC(encode_context_ptr->app_callback_ptr, EB_ENC_CL_ERROR2);
3464                 }
3465                 if (pcs_ptr->parent_pcs_ptr->frm_hdr.allow_intrabc && is_16bit && (context_ptr->bit_depth == EB_8BIT)) {
3466                     EbPictureBufferDesc *recon_buffer_16bit;
3467                     EbPictureBufferDesc *recon_buffer_8bit;
3468                     if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
3469                         //get the 16bit form of the input SB
3470                         recon_buffer_16bit = ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
3471                             ->reference_picture_wrapper_ptr->object_ptr)
3472                         ->reference_picture16bit;
3473                     else // non ref pictures
3474                         recon_buffer_16bit = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr;
3475 
3476                     if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE)
3477                         //get the 16bit form of the input SB
3478                         recon_buffer_8bit = ((EbReferenceObject *)pcs_ptr->parent_pcs_ptr
3479                             ->reference_picture_wrapper_ptr->object_ptr)
3480                         ->reference_picture;
3481                     else // non ref pictures
3482                         recon_buffer_8bit = pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
3483 
3484                     uint32_t pred_buf_x_offest = context_ptr->blk_origin_x;
3485                     uint32_t pred_buf_y_offest = context_ptr->blk_origin_y;
3486 
3487                     uint16_t *dst_16bit = (uint16_t *)(recon_buffer_16bit->buffer_y) +
3488                         pred_buf_x_offest + recon_buffer_16bit->origin_x +
3489                         (pred_buf_y_offest + recon_buffer_16bit->origin_y) *
3490                         recon_buffer_16bit->stride_y;
3491                     int32_t dst_stride_16bit = recon_buffer_16bit->stride_y;
3492 
3493                     uint8_t *dst;
3494                     int32_t  dst_stride;
3495 
3496                     dst = recon_buffer_8bit->buffer_y + pred_buf_x_offest + recon_buffer_8bit->origin_x +
3497                         (pred_buf_y_offest + recon_buffer_8bit->origin_y) * recon_buffer_8bit->stride_y;
3498                     dst_stride = recon_buffer_8bit->stride_y;
3499 
3500                     svt_convert_16bit_to_8bit(dst_16bit,
3501                         dst_stride_16bit,
3502                         dst,
3503                         dst_stride,
3504                         context_ptr->blk_geom->bwidth,
3505                         context_ptr->blk_geom->bheight);
3506 
3507                     //copy recon from 16bit to 8bit
3508                     pred_buf_x_offest = ((context_ptr->blk_origin_x >> 3) << 3) >> 1;
3509                     pred_buf_y_offest = ((context_ptr->blk_origin_y >> 3) << 3) >> 1;
3510 
3511                     dst_16bit = (uint16_t *)(recon_buffer_16bit->buffer_cb) +
3512                         pred_buf_x_offest + recon_buffer_16bit->origin_x / 2 +
3513                         (pred_buf_y_offest + recon_buffer_16bit->origin_y / 2) *
3514                         recon_buffer_16bit->stride_cb;
3515                     dst_stride_16bit = recon_buffer_16bit->stride_cb;
3516 
3517                     dst = recon_buffer_8bit->buffer_cb + pred_buf_x_offest +
3518                         recon_buffer_8bit->origin_x / 2 +
3519                         (pred_buf_y_offest + recon_buffer_8bit->origin_y / 2) *
3520                         recon_buffer_8bit->stride_cb;
3521                     dst_stride = recon_buffer_8bit->stride_cb;
3522 
3523 
3524                     svt_convert_16bit_to_8bit(dst_16bit,
3525                         dst_stride_16bit,
3526                         dst,
3527                         dst_stride,
3528                         context_ptr->blk_geom->bwidth_uv,
3529                         context_ptr->blk_geom->bheight_uv);
3530 
3531                     dst_16bit = (uint16_t *)(recon_buffer_16bit->buffer_cr) +
3532                         (pred_buf_x_offest + recon_buffer_16bit->origin_x / 2 +
3533                         (pred_buf_y_offest + recon_buffer_16bit->origin_y / 2) *
3534                             recon_buffer_16bit->stride_cr);
3535                     dst_stride_16bit = recon_buffer_16bit->stride_cr;
3536                     dst = recon_buffer_8bit->buffer_cr + pred_buf_x_offest +
3537                         recon_buffer_8bit->origin_x / 2 +
3538                         (pred_buf_y_offest + recon_buffer_8bit->origin_y / 2) *
3539                         recon_buffer_8bit->stride_cr;
3540                     dst_stride = recon_buffer_8bit->stride_cr;
3541 
3542 
3543                     svt_convert_16bit_to_8bit(dst_16bit,
3544                         dst_stride_16bit,
3545                         dst,
3546                         dst_stride,
3547                         context_ptr->blk_geom->bwidth_uv,
3548                         context_ptr->blk_geom->bheight_uv);
3549                 }
3550                 update_mi_map_skip_settings(blk_ptr);
3551                 if (pcs_ptr->cdf_ctrl.update_se) {
3552                     // Update the partition Neighbor Array
3553                     PartitionContext partition;
3554                     partition.above = partition_context_lookup[blk_geom->bsize].above;
3555                     partition.left  = partition_context_lookup[blk_geom->bsize].left;
3556 
3557                     neighbor_array_unit_mode_write(pcs_ptr->ep_partition_context_neighbor_array[tile_idx],
3558                                                    (uint8_t *)&partition,
3559                                                    context_ptr->blk_origin_x,
3560                                                    context_ptr->blk_origin_y,
3561                                                    blk_geom->bwidth,
3562                                                    blk_geom->bheight,
3563                                                    NEIGHBOR_ARRAY_UNIT_TOP_AND_LEFT_ONLY_MASK);
3564 
3565                     // Update the CDFs based on the current block
3566                     blk_ptr->av1xd->tile_ctx = &pcs_ptr->ec_ctx_array[sb_addr];
3567                     update_stats(pcs_ptr,
3568                                  blk_ptr,
3569                                  context_ptr->blk_origin_y >> MI_SIZE_LOG2,
3570                                  context_ptr->blk_origin_x >> MI_SIZE_LOG2);
3571                 }
3572 
3573                 if (dlf_enable_flag) {}
3574 
3575                 {{// Set the PU Loop Variables
3576                   pu_ptr = blk_ptr->prediction_unit_array;
3577                 // Set MvUnit
3578                 context_ptr->mv_unit.pred_direction = (uint8_t)pu_ptr->inter_pred_direction_index;
3579                 context_ptr->mv_unit.mv[REF_LIST_0].mv_union = pu_ptr->mv[REF_LIST_0].mv_union;
3580                 context_ptr->mv_unit.mv[REF_LIST_1].mv_union = pu_ptr->mv[REF_LIST_1].mv_union;
3581             }
3582         }
3583 
3584         {
3585             sb_ptr->final_blk_arr[final_blk_itr].av1xd = sb_ptr->av1xd;
3586             BlkStruct *src_cu = &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
3587             BlkStruct *dst_cu = &sb_ptr->final_blk_arr[final_blk_itr++];
3588             move_blk_data(pcs_ptr, context_ptr, src_cu, dst_cu);
3589         }
3590         if (scs_ptr->mfmv_enabled && pcs_ptr->slice_type != I_SLICE &&
3591             pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag) {
3592             uint32_t           mi_stride = pcs_ptr->mi_stride;
3593             int32_t            mi_row    = context_ptr->blk_origin_y >> MI_SIZE_LOG2;
3594             int32_t            mi_col    = context_ptr->blk_origin_x >> MI_SIZE_LOG2;
3595             const int32_t      offset    = mi_row * mi_stride + mi_col;
3596             ModeInfo *         mi_ptr    = *(pcs_ptr->mi_grid_base + offset);
3597             const int          x_mis     = AOMMIN(context_ptr->blk_geom->bwidth >> MI_SIZE_LOG2,
3598                                      pcs_ptr->parent_pcs_ptr->av1_cm->mi_cols - mi_col);
3599             const int          y_mis     = AOMMIN(context_ptr->blk_geom->bheight >> MI_SIZE_LOG2,
3600                                      pcs_ptr->parent_pcs_ptr->av1_cm->mi_rows - mi_row);
3601             EbReferenceObject *obj_l0 =
3602                 (EbReferenceObject *)
3603                     pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr;
3604 
3605             av1_copy_frame_mvs(pcs_ptr,
3606                                pcs_ptr->parent_pcs_ptr->av1_cm,
3607                                mi_ptr->mbmi,
3608                                mi_row,
3609                                mi_col,
3610                                x_mis,
3611                                y_mis,
3612                                obj_l0);
3613         }
3614     }
3615     blk_it +=
3616         ns_depth_offset[scs_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3617 }
3618 else blk_it +=
3619     d1_depth_offset[scs_ptr->seq_header.sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3620 } // CU Loop
3621 // First Pass Deblocking
3622     if (dlf_enable_flag && pcs_ptr->parent_pcs_ptr->loop_filter_mode == 1 && total_tile_cnt == 1) {
3623         //Jing: Don't work for tile_parallel since the SB of bottom tile comes early than the bottom SB of top tile
3624     if (pcs_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params.filter_level[0] ||
3625         pcs_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params.filter_level[1]) {
3626         uint8_t last_col =
3627             ((sb_origin_x) + sb_width == pcs_ptr->parent_pcs_ptr->aligned_width) ? 1 : 0;
3628         loop_filter_sb(recon_buffer, pcs_ptr, sb_origin_y >> 2, sb_origin_x >> 2, 0, 3, last_col);
3629     }
3630 }
3631 
3632 return;
3633 }
3634 
3635 #if NO_ENCDEC
no_enc_dec_pass(SequenceControlSet * scs_ptr,PictureControlSet * pcs_ptr,SuperBlock * sb_ptr,uint32_t sb_addr,uint32_t sb_origin_x,uint32_t sb_origin_y,uint32_t sb_qp,EncDecContext * context_ptr)3636 EB_EXTERN void no_enc_dec_pass(SequenceControlSet *scs_ptr, PictureControlSet *pcs_ptr,
3637                                SuperBlock *sb_ptr, uint32_t sb_addr, uint32_t sb_origin_x,
3638                                uint32_t sb_origin_y, uint32_t sb_qp, EncDecContext *context_ptr) {
3639     context_ptr->coded_area_sb    = 0;
3640     context_ptr->coded_area_sb_uv = 0;
3641 
3642     uint32_t final_blk_itr = 0;
3643 
3644     uint32_t blk_it = 0;
3645 
3646     while (blk_it < scs_ptr->max_block_cnt) {
3647         BlkStruct *blk_ptr = context_ptr->blk_ptr =
3648             &context_ptr->md_context->md_blk_arr_nsq[blk_it];
3649         PartitionType    part     = blk_ptr->part;
3650         const BlockGeom *blk_geom = context_ptr->blk_geom = get_blk_geom_mds(blk_it);
3651 
3652         sb_ptr->cu_partition_array[blk_it] = context_ptr->md_context->md_blk_arr_nsq[blk_it].part;
3653 
3654         if (part != PARTITION_SPLIT) {
3655             int32_t offset_d1 = ns_blk_offset[(int32_t)part]; //blk_ptr->best_d1_blk; // TOCKECK
3656             int32_t num_d1_block =
3657                 ns_blk_num[(int32_t)part]; // context_ptr->blk_geom->totns; // TOCKECK
3658 
3659             for (int32_t d1_itr = blk_it + offset_d1; d1_itr < blk_it + offset_d1 + num_d1_block;
3660                  d1_itr++) {
3661                 const BlockGeom *blk_geom = context_ptr->blk_geom = get_blk_geom_mds(d1_itr);
3662                 BlkStruct *     blk_ptr                          = context_ptr->blk_ptr =
3663                     &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
3664 
3665                 blk_ptr->delta_qp     = 0;
3666                 blk_ptr->qp           = pcs_ptr->picture_qp;
3667                 sb_ptr->qp            = pcs_ptr->picture_qp;
3668 
3669                 {
3670                     BlkStruct *src_cu = &context_ptr->md_context->md_blk_arr_nsq[d1_itr];
3671                     BlkStruct *dst_cu = &sb_ptr->final_blk_arr[final_blk_itr++];
3672 
3673                     move_blk_data(src_cu, dst_cu);
3674                 }
3675 
3676                 //copy coeff
3677                 int32_t txb_1d_offset = 0, txb_1d_offset_uv = 0;
3678 
3679                 int32_t txb_itr = 0;
3680                 do {
3681                     uint32_t bwidth = context_ptr->blk_geom->tx_width[txb_itr] < 64
3682                                           ? context_ptr->blk_geom->tx_width[txb_itr]
3683                                           : 32;
3684                     uint32_t bheight = context_ptr->blk_geom->tx_height[txb_itr] < 64
3685                                            ? context_ptr->blk_geom->tx_height[txb_itr]
3686                                            : 32;
3687 
3688                     int32_t *src_ptr =
3689                         &(((int32_t *)context_ptr->blk_ptr->coeff_tmp->buffer_y)[txb_1d_offset]);
3690                     int32_t *dst_ptr = &(
3691                         ((int32_t *)sb_ptr->quantized_coeff->buffer_y)[context_ptr->coded_area_sb]);
3692 
3693                     uint32_t j;
3694                     for (j = 0; j < bheight; j++)
3695                         svt_memcpy(
3696                             dst_ptr + j * bwidth, src_ptr + j * bwidth, bwidth * sizeof(int32_t));
3697                     if (context_ptr->blk_geom->has_uv) {
3698                         // Cb
3699                         bwidth  = context_ptr->blk_geom->tx_width_uv[txb_itr];
3700                         bheight = context_ptr->blk_geom->tx_height_uv[txb_itr];
3701 
3702                         src_ptr =
3703                             &(((int32_t *)
3704                                    context_ptr->blk_ptr->coeff_tmp->buffer_cb)[txb_1d_offset_uv]);
3705                         dst_ptr = &(((int32_t *)sb_ptr->quantized_coeff
3706                                          ->buffer_cb)[context_ptr->coded_area_sb_uv]);
3707 
3708                         for (j = 0; j < bheight; j++)
3709                             svt_memcpy(dst_ptr + j * bwidth,
3710                                    src_ptr + j * bwidth,
3711                                    bwidth * sizeof(int32_t));
3712                         //Cr
3713                         src_ptr =
3714                             &(((int32_t *)
3715                                    context_ptr->blk_ptr->coeff_tmp->buffer_cr)[txb_1d_offset_uv]);
3716                         dst_ptr = &(((int32_t *)sb_ptr->quantized_coeff
3717                                          ->buffer_cr)[context_ptr->coded_area_sb_uv]);
3718 
3719                         for (j = 0; j < bheight; j++)
3720                             svt_memcpy(dst_ptr + j * bwidth,
3721                                    src_ptr + j * bwidth,
3722                                    bwidth * sizeof(int32_t));
3723                     }
3724 
3725                     context_ptr->coded_area_sb += context_ptr->blk_geom->tx_width[txb_itr] *
3726                                                   context_ptr->blk_geom->tx_height[txb_itr];
3727                     if (context_ptr->blk_geom->has_uv)
3728                         context_ptr->coded_area_sb_uv +=
3729                             context_ptr->blk_geom->tx_width_uv[txb_itr] *
3730                             context_ptr->blk_geom->tx_height_uv[txb_itr];
3731 
3732                     txb_1d_offset += context_ptr->blk_geom->tx_width[txb_itr] *
3733                                      context_ptr->blk_geom->tx_height[txb_itr];
3734                     if (context_ptr->blk_geom->has_uv)
3735                         txb_1d_offset_uv += context_ptr->blk_geom->tx_width_uv[txb_itr] *
3736                                             context_ptr->blk_geom->tx_height_uv[txb_itr];
3737 
3738                     txb_itr++;
3739                 } while (txb_itr < context_ptr->blk_geom->txb_count);
3740 
3741                 //copy recon
3742                 {
3743                     EbPictureBufferDesc *ref_pic;
3744                     if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag) {
3745                         EbReferenceObject *ref_obj =
3746                             (EbReferenceObject *)
3747                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr;
3748                         ref_pic = ref_obj->reference_picture;
3749                     } else
3750                         ref_pic = pcs_ptr->recon_picture_ptr;
3751                     context_ptr->blk_origin_x = sb_origin_x + context_ptr->blk_geom->origin_x;
3752                     context_ptr->blk_origin_y = sb_origin_y + context_ptr->blk_geom->origin_y;
3753 
3754                     uint32_t bwidth  = context_ptr->blk_geom->bwidth;
3755                     uint32_t bheight = context_ptr->blk_geom->bheight;
3756 
3757                     uint8_t *src_ptr = &(((uint8_t *)context_ptr->blk_ptr->recon_tmp->buffer_y)[0]);
3758                     uint8_t *dst_ptr =
3759                         ref_pic->buffer_y + ref_pic->origin_x + context_ptr->blk_origin_x +
3760                         (ref_pic->origin_y + context_ptr->blk_origin_y) * ref_pic->stride_y;
3761 
3762                     uint32_t j;
3763                     for (j = 0; j < bheight; j++)
3764                         svt_memcpy(dst_ptr + j * ref_pic->stride_y,
3765                                src_ptr + j * 128,
3766                                bwidth * sizeof(uint8_t));
3767                     if (context_ptr->blk_geom->has_uv) {
3768                         bwidth  = context_ptr->blk_geom->bwidth_uv;
3769                         bheight = context_ptr->blk_geom->bheight_uv;
3770 
3771                         src_ptr = &(((uint8_t *)context_ptr->blk_ptr->recon_tmp->buffer_cb)[0]);
3772 
3773                         dst_ptr =
3774                             ref_pic->buffer_cb + ref_pic->origin_x / 2 +
3775                             ((context_ptr->blk_origin_x >> 3) << 3) / 2 +
3776                             (ref_pic->origin_y / 2 + ((context_ptr->blk_origin_y >> 3) << 3) / 2) *
3777                                 ref_pic->stride_cb;
3778 
3779                         for (j = 0; j < bheight; j++)
3780                             svt_memcpy(dst_ptr + j * ref_pic->stride_cb,
3781                                    src_ptr + j * 64,
3782                                    bwidth * sizeof(uint8_t));
3783                         src_ptr = &(((uint8_t *)context_ptr->blk_ptr->recon_tmp->buffer_cr)[0]);
3784 
3785                         dst_ptr =
3786                             ref_pic->buffer_cr + ref_pic->origin_x / 2 +
3787                             ((context_ptr->blk_origin_x >> 3) << 3) / 2 +
3788                             (ref_pic->origin_y / 2 + ((context_ptr->blk_origin_y >> 3) << 3) / 2) *
3789                                 ref_pic->stride_cr;
3790 
3791                         for (j = 0; j < bheight; j++)
3792                             svt_memcpy(dst_ptr + j * ref_pic->stride_cr,
3793                                    src_ptr + j * 64,
3794                                    bwidth * sizeof(uint8_t));
3795                     }
3796                 }
3797             }
3798             blk_it +=
3799                 ns_depth_offset[scs_ptr->sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3800         } else
3801             blk_it +=
3802                 d1_depth_offset[scs_ptr->sb_size == BLOCK_128X128][context_ptr->blk_geom->depth];
3803     } // CU Loop
3804 
3805     return;
3806 }
3807 #endif
3808