1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10  */
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <math.h>
14 #include <string.h>
15 
16 #include "EbEncCdef.h"
17 #include <stdint.h>
18 #include "aom_dsp_rtcd.h"
19 #include "EbLog.h"
20 
21 extern int16_t svt_av1_ac_quant_q3(int32_t qindex, int32_t delta, AomBitDepth bit_depth);
22 
23 #include "EbRateDistortionCost.h"
24 
dist_8x8_16bit_c(const uint16_t * src,const uint16_t * dst,const int32_t dstride,const int32_t coeff_shift)25 static INLINE uint64_t dist_8x8_16bit_c(const uint16_t *src, const uint16_t *dst,
26                                         const int32_t dstride, const int32_t coeff_shift) {
27     uint64_t svar   = 0;
28     uint64_t dvar   = 0;
29     uint64_t sum_s  = 0;
30     uint64_t sum_d  = 0;
31     uint64_t sum_s2 = 0;
32     uint64_t sum_d2 = 0;
33     uint64_t sum_sd = 0;
34     int32_t  i, j;
35     for (i = 0; i < 8; i++) {
36         for (j = 0; j < 8; j++) {
37             sum_s += src[8 * i + j];
38             sum_d += dst[i * dstride + j];
39             sum_s2 += src[8 * i + j] * src[8 * i + j];
40             sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
41             sum_sd += src[8 * i + j] * dst[i * dstride + j];
42         }
43     }
44     /* Compute the variance -- the calculation cannot go negative. */
45     svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
46     dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
47     return (uint64_t)floor(.5 +
48                            (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
49                                (svar + dvar + (400 << 2 * coeff_shift)) /
50                                (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
51 }
52 
mse_8_16bit(const uint16_t * src,const uint16_t * dst,const int32_t dstride,const int32_t height)53 static INLINE uint64_t mse_8_16bit(const uint16_t *src, const uint16_t *dst, const int32_t dstride,
54                                    const int32_t height) {
55     uint64_t sum = 0;
56     int32_t  i, j;
57     for (i = 0; i < height; i++) {
58         for (j = 0; j < 8; j++) {
59             int32_t e = dst[i * dstride + j] - src[8 * i + j];
60             sum += e * e;
61         }
62     }
63     return sum;
64 }
65 
mse_4_16bit_c(const uint16_t * src,const uint16_t * dst,const int32_t dstride,const int32_t height)66 static INLINE uint64_t mse_4_16bit_c(const uint16_t *src, const uint16_t *dst,
67                                      const int32_t dstride, const int32_t height) {
68     uint64_t sum = 0;
69     int32_t  i, j;
70     for (i = 0; i < height; i++) {
71         for (j = 0; j < 4; j++) {
72             int32_t e = dst[i * dstride + j] - src[4 * i + j];
73             sum += e * e;
74         }
75     }
76     return sum;
77 }
78 
dist_8x8_8bit_c(const uint8_t * src,const uint8_t * dst,const int32_t dstride,const int32_t coeff_shift)79 static INLINE uint64_t dist_8x8_8bit_c(const uint8_t *src, const uint8_t *dst,
80                                        const int32_t dstride, const int32_t coeff_shift) {
81     uint64_t svar   = 0;
82     uint64_t dvar   = 0;
83     uint64_t sum_s  = 0;
84     uint64_t sum_d  = 0;
85     uint64_t sum_s2 = 0;
86     uint64_t sum_d2 = 0;
87     uint64_t sum_sd = 0;
88     int32_t  i, j;
89     for (i = 0; i < 8; i++) {
90         for (j = 0; j < 8; j++) {
91             sum_s += src[8 * i + j];
92             sum_d += dst[i * dstride + j];
93             sum_s2 += src[8 * i + j] * src[8 * i + j];
94             sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
95             sum_sd += src[8 * i + j] * dst[i * dstride + j];
96         }
97     }
98     /* Compute the variance -- the calculation cannot go negative. */
99     svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
100     dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
101     return (uint64_t)floor(.5 +
102                            (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
103                                (svar + dvar + (400 << 2 * coeff_shift)) /
104                                (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
105 }
106 
mse_8_8bit(const uint8_t * src,const uint8_t * dst,const int32_t dstride,const int32_t height)107 static INLINE uint64_t mse_8_8bit(const uint8_t *src, const uint8_t *dst, const int32_t dstride,
108                                   const int32_t height) {
109     uint64_t sum = 0;
110     int32_t  i, j;
111     for (i = 0; i < height; i++) {
112         for (j = 0; j < 8; j++) {
113             int32_t e = dst[i * dstride + j] - src[8 * i + j];
114             sum += e * e;
115         }
116     }
117     return sum;
118 }
119 
mse_4_8bit_c(const uint8_t * src,const uint8_t * dst,const int32_t dstride,const int32_t height)120 static INLINE uint64_t mse_4_8bit_c(const uint8_t *src, const uint8_t *dst, const int32_t dstride,
121                                     const int32_t height) {
122     uint64_t sum = 0;
123     int32_t  i, j;
124     for (i = 0; i < height; i++) {
125         for (j = 0; j < 4; j++) {
126             int32_t e = dst[i * dstride + j] - src[4 * i + j];
127             sum += e * e;
128         }
129     }
130     return sum;
131 }
132 
133 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_c(const uint16_t * dst,int32_t dstride,const uint16_t * src,const CdefList * dlist,int32_t cdef_count,BlockSize bsize,int32_t coeff_shift,int32_t pli)134 uint64_t compute_cdef_dist_c(const uint16_t *dst, int32_t dstride, const uint16_t *src,
135                              const CdefList *dlist, int32_t cdef_count, BlockSize bsize,
136                              int32_t coeff_shift, int32_t pli) {
137     uint64_t sum = 0;
138     int32_t  bi, bx, by;
139     if (bsize == BLOCK_8X8) {
140         for (bi = 0; bi < cdef_count; bi++) {
141             by = dlist[bi].by;
142             bx = dlist[bi].bx;
143             if (pli == 0) {
144                 sum += dist_8x8_16bit_c(&src[bi << (3 + 3)],
145                                         &dst[(by << 3) * dstride + (bx << 3)],
146                                         dstride,
147                                         coeff_shift);
148             } else
149                 sum += mse_8_16bit(
150                     &src[bi << (3 + 3)], &dst[(by << 3) * dstride + (bx << 3)], dstride, 8);
151         }
152     } else if (bsize == BLOCK_4X8) {
153         for (bi = 0; bi < cdef_count; bi++) {
154             by = dlist[bi].by;
155             bx = dlist[bi].bx;
156             sum += mse_4_16bit_c(
157                 &src[bi << (3 + 2)], &dst[(by << 3) * dstride + (bx << 2)], dstride, 8);
158         }
159     } else if (bsize == BLOCK_8X4) {
160         for (bi = 0; bi < cdef_count; bi++) {
161             by = dlist[bi].by;
162             bx = dlist[bi].bx;
163             sum += mse_8_16bit(
164                 &src[bi << (2 + 3)], &dst[(by << 2) * dstride + (bx << 3)], dstride, 4);
165         }
166     } else {
167         assert(bsize == BLOCK_4X4);
168         for (bi = 0; bi < cdef_count; bi++) {
169             by = dlist[bi].by;
170             bx = dlist[bi].bx;
171             sum += mse_4_16bit_c(
172                 &src[bi << (2 + 2)], &dst[(by << 2) * dstride + (bx << 2)], dstride, 4);
173         }
174     }
175     return sum >> 2 * coeff_shift;
176 }
177 
compute_cdef_dist_8bit_c(const uint8_t * dst8,int32_t dstride,const uint8_t * src8,const CdefList * dlist,int32_t cdef_count,BlockSize bsize,int32_t coeff_shift,int32_t pli)178 uint64_t compute_cdef_dist_8bit_c(const uint8_t *dst8, int32_t dstride, const uint8_t *src8,
179                                   const CdefList *dlist, int32_t cdef_count, BlockSize bsize,
180                                   int32_t coeff_shift, int32_t pli) {
181     uint64_t sum = 0;
182     int32_t  bi, bx, by;
183     if (bsize == BLOCK_8X8) {
184         for (bi = 0; bi < cdef_count; bi++) {
185             by = dlist[bi].by;
186             bx = dlist[bi].bx;
187             if (pli == 0) {
188                 sum += dist_8x8_8bit_c(&src8[bi << (3 + 3)],
189                                        &dst8[(by << 3) * dstride + (bx << 3)],
190                                        dstride,
191                                        coeff_shift);
192             } else
193                 sum += mse_8_8bit(
194                     &src8[bi << (3 + 3)], &dst8[(by << 3) * dstride + (bx << 3)], dstride, 8);
195         }
196     } else if (bsize == BLOCK_4X8) {
197         for (bi = 0; bi < cdef_count; bi++) {
198             by = dlist[bi].by;
199             bx = dlist[bi].bx;
200             sum += mse_4_8bit_c(
201                 &src8[bi << (3 + 2)], &dst8[(by << 3) * dstride + (bx << 2)], dstride, 8);
202         }
203     } else if (bsize == BLOCK_8X4) {
204         for (bi = 0; bi < cdef_count; bi++) {
205             by = dlist[bi].by;
206             bx = dlist[bi].bx;
207             sum += mse_8_8bit(
208                 &src8[bi << (2 + 3)], &dst8[(by << 2) * dstride + (bx << 3)], dstride, 4);
209         }
210     } else {
211         assert(bsize == BLOCK_4X4);
212         for (bi = 0; bi < cdef_count; bi++) {
213             by = dlist[bi].by;
214             bx = dlist[bi].bx;
215             sum += mse_4_8bit_c(
216                 &src8[bi << (2 + 2)], &dst8[(by << 2) * dstride + (bx << 2)], dstride, 4);
217         }
218     }
219     return sum >> 2 * coeff_shift;
220 }
221 
svt_sb_all_skip(PictureControlSet * pcs_ptr,const Av1Common * const cm,int32_t mi_row,int32_t mi_col)222 int32_t svt_sb_all_skip(PictureControlSet *pcs_ptr, const Av1Common *const cm, int32_t mi_row,
223                         int32_t mi_col) {
224     int32_t maxc, maxr;
225     int32_t skip = 1;
226     maxc         = cm->mi_cols - mi_col;
227     maxr         = cm->mi_rows - mi_row;
228 
229     maxr = AOMMIN(maxr, MI_SIZE_64X64);
230     maxc = AOMMIN(maxc, MI_SIZE_64X64);
231 
232     for (int32_t r = 0; r < maxr; r++) {
233         for (int32_t c = 0; c < maxc; c++) {
234             skip = skip &&
235                 pcs_ptr->mi_grid_base[(mi_row + r) * pcs_ptr->mi_stride + mi_col + c]
236                     ->mbmi.block_mi.skip;
237         }
238     }
239     return skip;
240 }
241 
is_8x8_block_skip(ModeInfo ** grid,int32_t mi_row,int32_t mi_col,int32_t mi_stride)242 static int32_t is_8x8_block_skip(ModeInfo **grid, int32_t mi_row, int32_t mi_col,
243                                  int32_t mi_stride) {
244     int32_t is_skip = 1;
245     for (int32_t r = 0; r < mi_size_high[BLOCK_8X8]; ++r)
246         for (int32_t c = 0; c < mi_size_wide[BLOCK_8X8]; ++c)
247             is_skip &= (int32_t)(grid[(mi_row + r) * mi_stride + (mi_col + c)]->mbmi.block_mi.skip);
248 
249     return is_skip;
250 }
251 
svt_sb_compute_cdef_list(PictureControlSet * pcs_ptr,const Av1Common * const cm,int32_t mi_row,int32_t mi_col,CdefList * dlist,BlockSize bs)252 int32_t svt_sb_compute_cdef_list(PictureControlSet *pcs_ptr, const Av1Common *const cm,
253                                  int32_t mi_row, int32_t mi_col, CdefList *dlist, BlockSize bs) {
254     //MbModeInfo **grid = cm->mi_grid_visible;
255     ModeInfo **grid = pcs_ptr->mi_grid_base;
256 
257     int32_t maxc = cm->mi_cols - mi_col;
258     int32_t maxr = cm->mi_rows - mi_row;
259 
260     if (bs == BLOCK_128X128 || bs == BLOCK_128X64)
261         maxc = AOMMIN(maxc, MI_SIZE_128X128);
262     else
263         maxc = AOMMIN(maxc, MI_SIZE_64X64);
264     if (bs == BLOCK_128X128 || bs == BLOCK_64X128)
265         maxr = AOMMIN(maxr, MI_SIZE_128X128);
266     else
267         maxr = AOMMIN(maxr, MI_SIZE_64X64);
268 
269     const int32_t r_step  = mi_size_high[BLOCK_8X8];
270     const int32_t c_step  = mi_size_wide[BLOCK_8X8];
271     const int32_t r_shift = (r_step == 2);
272     const int32_t c_shift = (c_step == 2);
273 
274     assert(r_step == 1 || r_step == 2);
275     assert(c_step == 1 || c_step == 2);
276 
277     int32_t count = 0;
278 
279     for (int32_t r = 0; r < maxr; r += r_step) {
280         for (int32_t c = 0; c < maxc; c += c_step) {
281             if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, pcs_ptr->mi_stride)) {
282                 dlist[count].by   = (uint8_t)(r >> r_shift);
283                 dlist[count].bx   = (uint8_t)(c >> c_shift);
284                 dlist[count].skip = 0;
285                 count++;
286             }
287         }
288     }
289     return count;
290 }
291 
svt_av1_cdef_frame(EncDecContext * context_ptr,SequenceControlSet * scs_ptr,PictureControlSet * pCs)292 void svt_av1_cdef_frame(EncDecContext *context_ptr, SequenceControlSet *scs_ptr,
293                         PictureControlSet *pCs) {
294     (void)context_ptr;
295 
296     struct PictureParentControlSet *ppcs    = pCs->parent_pcs_ptr;
297     Av1Common *                     cm      = ppcs->av1_cm;
298     FrameHeader *                   frm_hdr = &ppcs->frm_hdr;
299 
300     EbPictureBufferDesc *recon_picture_ptr;
301 
302     if (ppcs->is_used_as_reference_flag == EB_TRUE)
303         recon_picture_ptr =
304             ((EbReferenceObject *)pCs->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
305                 ->reference_picture;
306     else
307         recon_picture_ptr = pCs->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
308 
309     EbByte recon_buffer_y = &(
310         (recon_picture_ptr->buffer_y)[recon_picture_ptr->origin_x +
311                                       recon_picture_ptr->origin_y * recon_picture_ptr->stride_y]);
312     EbByte recon_buffer_cb = &((recon_picture_ptr->buffer_cb)[recon_picture_ptr->origin_x / 2 +
313                                                               recon_picture_ptr->origin_y / 2 *
314                                                                   recon_picture_ptr->stride_cb]);
315     EbByte recon_buffer_cr = &((recon_picture_ptr->buffer_cr)[recon_picture_ptr->origin_x / 2 +
316                                                               recon_picture_ptr->origin_y / 2 *
317                                                                   recon_picture_ptr->stride_cr]);
318 
319     const int32_t num_planes = av1_num_planes(&scs_ptr->seq_header.color_config);
320     DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
321     uint16_t *linebuf[3];
322     uint16_t *colbuf[3];
323     CdefList  dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
324     uint8_t * row_cdef, *prev_row_cdef, *curr_row_cdef;
325     int32_t   cdef_count;
326     int32_t   dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
327     int32_t   var[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
328     int32_t   mi_wide_l2[3];
329     int32_t   mi_high_l2[3];
330     int32_t   xdec[3];
331     int32_t   ydec[3];
332     int32_t coeff_shift = AOMMAX(scs_ptr->static_config.encoder_bit_depth /*cm->bit_depth*/ - 8, 0);
333     const int32_t nvfb  = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
334     const int32_t nhfb  = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
335     //svt_av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, num_planes);
336     row_cdef = (uint8_t *)svt_aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
337     assert(row_cdef != NULL);
338     memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
339     prev_row_cdef = row_cdef + 1;
340     curr_row_cdef = prev_row_cdef + nhfb + 2;
341     for (int32_t pli = 0; pli < num_planes; pli++) {
342         int32_t subsampling_x = (pli == 0) ? 0 : 1;
343         int32_t subsampling_y = (pli == 0) ? 0 : 1;
344 
345         xdec[pli]       = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
346         ydec[pli]       = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
347         mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
348         mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
349     }
350 
351     const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
352     for (int32_t pli = 0; pli < num_planes; pli++) {
353         linebuf[pli] = (uint16_t *)svt_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
354         colbuf[pli]  = (uint16_t *)svt_aom_malloc(
355             sizeof(*colbuf) * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
356             CDEF_HBORDER);
357     }
358 
359     for (int32_t fbr = 0; fbr < nvfb; fbr++) {
360         for (int32_t pli = 0; pli < num_planes; pli++) {
361             const int32_t block_height = (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
362             fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER, CDEF_VERY_LARGE);
363         }
364 
365         int32_t cdef_left = 1;
366         for (int32_t fbc = 0; fbc < nhfb; fbc++) {
367             int32_t level, sec_strength;
368             int32_t uv_level, uv_sec_strength;
369             int32_t nhb, nvb;
370             int32_t cstart     = 0;
371             curr_row_cdef[fbc] = 0;
372 
373             //WAHT IS THIS  ?? CHKN -->for
374             if (pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] ==
375                     NULL ||
376                 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
377                         ->mbmi.cdef_strength == -1) {
378                 cdef_left = 0;
379                 SVT_LOG("\n\n\nCDEF ERROR: Skipping Current FB\n\n\n");
380                 continue;
381             }
382 
383             if (!cdef_left)
384                 cstart =
385                     -CDEF_HBORDER; //CHKN if the left block has not been filtered, then we can use samples on the left as input.
386 
387             nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
388             nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
389             int32_t frame_top, frame_left, frame_bottom, frame_right;
390 
391             int32_t mi_row = MI_SIZE_64X64 * fbr;
392             int32_t mi_col = MI_SIZE_64X64 * fbc;
393             // for the current filter block, it's top left corner mi structure (mi_tl)
394             // is first accessed to check whether the top and left boundaries are
395             // frame boundaries. Then bottom-left and top-right mi structures are
396             // accessed to check whether the bottom and right boundaries
397             // (respectively) are frame boundaries.
398             //
399             // Note that we can't just check the bottom-right mi structure - eg. if
400             // we're at the right-hand edge of the frame but not the bottom, then
401             // the bottom-right mi is NULL but the bottom-left is not.
402             frame_top  = (mi_row == 0) ? 1 : 0;
403             frame_left = (mi_col == 0) ? 1 : 0;
404 
405             if (fbr != nvfb - 1)
406                 frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
407             else
408                 frame_bottom = 1;
409 
410             if (fbc != nhfb - 1)
411                 frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
412             else
413                 frame_right = 1;
414 
415             const int32_t mbmi_cdef_strength =
416                 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
417                     ->mbmi.cdef_strength;
418             level = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
419             sec_strength = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] %
420                 CDEF_SEC_STRENGTHS;
421             sec_strength += sec_strength == 3;
422             uv_level = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] /
423                 CDEF_SEC_STRENGTHS;
424             uv_sec_strength = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] %
425                 CDEF_SEC_STRENGTHS;
426             uv_sec_strength += uv_sec_strength == 3;
427             if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
428                 (cdef_count = svt_sb_compute_cdef_list(
429                      pCs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
430                 cdef_left = 0;
431                 continue;
432             }
433 
434             curr_row_cdef[fbc] = 1;
435             for (int32_t pli = 0; pli < num_planes; pli++) {
436                 int32_t coffset;
437                 int32_t rend, cend;
438                 int32_t pri_damping = frm_hdr->cdef_params.cdef_damping;
439                 int32_t sec_damping = pri_damping;
440                 int32_t hsize       = nhb << mi_wide_l2[pli];
441                 int32_t vsize       = nvb << mi_high_l2[pli];
442 
443                 if (pli) {
444                     level        = uv_level;
445                     sec_strength = uv_sec_strength;
446                 }
447 
448                 if (fbc == nhfb - 1)
449                     cend = hsize;
450                 else
451                     cend = hsize + CDEF_HBORDER;
452 
453                 if (fbr == nvfb - 1)
454                     rend = vsize;
455                 else
456                     rend = vsize + CDEF_VBORDER;
457 
458                 coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
459                 if (fbc == nhfb - 1) {
460                     /* On the last superblock column, fill in the right border with
461                        CDEF_VERY_LARGE to avoid filtering with the outside. */
462                     fill_rect(&src[cend + CDEF_HBORDER],
463                               CDEF_BSTRIDE,
464                               rend + CDEF_VBORDER,
465                               hsize + CDEF_HBORDER - cend,
466                               CDEF_VERY_LARGE);
467                 }
468                 if (fbr == nvfb - 1) {
469                     /* On the last superblock row, fill in the bottom border with
470                        CDEF_VERY_LARGE to avoid filtering with the outside. */
471                     fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE],
472                               CDEF_BSTRIDE,
473                               CDEF_VBORDER,
474                               hsize + 2 * CDEF_HBORDER,
475                               CDEF_VERY_LARGE);
476                 }
477 
478                 uint8_t *rec_buff   = 0;
479                 uint32_t rec_stride = 0;
480 
481                 switch (pli) {
482                 case 0:
483                     rec_buff   = recon_buffer_y;
484                     rec_stride = recon_picture_ptr->stride_y;
485                     break;
486                 case 1:
487                     rec_buff   = recon_buffer_cb;
488                     rec_stride = recon_picture_ptr->stride_cb;
489 
490                     break;
491                 case 2:
492                     rec_buff   = recon_buffer_cr;
493                     rec_stride = recon_picture_ptr->stride_cr;
494                     break;
495                 }
496 
497                 /* Copy in the pixels we need from the current superblock for
498                    deringing.*/
499                 copy_sb8_16(&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
500                             CDEF_BSTRIDE,
501                             rec_buff,
502                             (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr,
503                             coffset + cstart,
504                             rec_stride,
505                             rend,
506                             cend - cstart);
507                 if (!prev_row_cdef[fbc]) {
508                     copy_sb8_16(&src[CDEF_HBORDER],
509                                 CDEF_BSTRIDE,
510                                 rec_buff,
511                                 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
512                                 coffset,
513                                 rec_stride,
514                                 CDEF_VBORDER,
515                                 hsize);
516                 } else if (fbr > 0) {
517                     copy_rect(&src[CDEF_HBORDER],
518                               CDEF_BSTRIDE,
519                               &linebuf[pli][coffset],
520                               stride,
521                               CDEF_VBORDER,
522                               hsize);
523                 } else {
524                     fill_rect(
525                         &src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, CDEF_VERY_LARGE);
526                 }
527 
528                 if (!prev_row_cdef[fbc - 1]) {
529                     copy_sb8_16(src,
530                                 CDEF_BSTRIDE,
531                                 rec_buff,
532                                 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
533                                 coffset - CDEF_HBORDER,
534                                 rec_stride,
535                                 CDEF_VBORDER,
536                                 CDEF_HBORDER);
537                 } else if (fbr > 0 && fbc > 0) {
538                     copy_rect(src,
539                               CDEF_BSTRIDE,
540                               &linebuf[pli][coffset - CDEF_HBORDER],
541                               stride,
542                               CDEF_VBORDER,
543                               CDEF_HBORDER);
544                 } else {
545                     fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
546                 }
547 
548                 if (!prev_row_cdef[fbc + 1]) {
549                     copy_sb8_16(&src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
550                                 CDEF_BSTRIDE,
551                                 rec_buff,
552                                 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
553                                 coffset + hsize,
554                                 rec_stride,
555                                 CDEF_VBORDER,
556                                 CDEF_HBORDER);
557                 } else if (fbr > 0 && fbc < nhfb - 1) {
558                     copy_rect(&src[hsize + CDEF_HBORDER],
559                               CDEF_BSTRIDE,
560                               &linebuf[pli][coffset + hsize],
561                               stride,
562                               CDEF_VBORDER,
563                               CDEF_HBORDER);
564                 } else {
565                     fill_rect(&src[hsize + CDEF_HBORDER],
566                               CDEF_BSTRIDE,
567                               CDEF_VBORDER,
568                               CDEF_HBORDER,
569                               CDEF_VERY_LARGE);
570                 }
571 
572                 if (cdef_left) {
573                     /* If we deringed the superblock on the left then we need to copy in
574                        saved pixels. */
575                     copy_rect(src,
576                               CDEF_BSTRIDE,
577                               colbuf[pli],
578                               CDEF_HBORDER,
579                               rend + CDEF_VBORDER,
580                               CDEF_HBORDER);
581                 }
582 
583                 /* Saving pixels in case we need to dering the superblock on the
584                     right. */
585                 if (fbc < nhfb - 1)
586                     copy_rect(colbuf[pli],
587                               CDEF_HBORDER,
588                               src + hsize,
589                               CDEF_BSTRIDE,
590                               rend + CDEF_VBORDER,
591                               CDEF_HBORDER);
592 
593                 if (fbr < nvfb - 1)
594                     copy_sb8_16(&linebuf[pli][coffset],
595                                 stride,
596                                 rec_buff,
597                                 (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
598                                 coffset,
599                                 rec_stride,
600                                 CDEF_VBORDER,
601                                 hsize);
602 
603                 if (frame_top) {
604                     fill_rect(
605                         src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
606                 }
607                 if (frame_left) {
608                     fill_rect(
609                         src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
610                 }
611                 if (frame_bottom) {
612                     fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE],
613                               CDEF_BSTRIDE,
614                               CDEF_VBORDER,
615                               hsize + 2 * CDEF_HBORDER,
616                               CDEF_VERY_LARGE);
617                 }
618                 if (frame_right) {
619                     fill_rect(&src[hsize + CDEF_HBORDER],
620                               CDEF_BSTRIDE,
621                               vsize + 2 * CDEF_VBORDER,
622                               CDEF_HBORDER,
623                               CDEF_VERY_LARGE);
624                 }
625 
626                 {
627                     svt_cdef_filter_fb(
628                         &rec_buff[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
629                                   (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
630                         NULL,
631                         rec_stride,
632                         &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER],
633                         xdec[pli],
634                         ydec[pli],
635                         dir,
636                         NULL,
637                         var,
638                         pli,
639                         dlist,
640                         cdef_count,
641                         level,
642                         sec_strength,
643                         pri_damping,
644                         sec_damping,
645                         coeff_shift);
646                 }
647             }
648             cdef_left = 1; //CHKN filtered data is written back directy to recFrame.
649         }
650         {
651             uint8_t *tmp  = prev_row_cdef;
652             prev_row_cdef = curr_row_cdef;
653             curr_row_cdef = tmp;
654         }
655     }
656     svt_aom_free(row_cdef);
657     for (int32_t pli = 0; pli < num_planes; pli++) {
658         svt_aom_free(linebuf[pli]);
659         svt_aom_free(colbuf[pli]);
660     }
661 }
662 
av1_cdef_frame16bit(EncDecContext * context_ptr,SequenceControlSet * scs_ptr,PictureControlSet * pCs)663 void av1_cdef_frame16bit(EncDecContext *context_ptr, SequenceControlSet *scs_ptr,
664                          PictureControlSet *pCs) {
665     (void)context_ptr;
666     struct PictureParentControlSet *ppcs    = pCs->parent_pcs_ptr;
667     Av1Common *                     cm      = ppcs->av1_cm;
668     FrameHeader *                   frm_hdr = &ppcs->frm_hdr;
669 
670     EbPictureBufferDesc *recon_picture_ptr;
671 
672     if (ppcs->is_used_as_reference_flag == EB_TRUE)
673         recon_picture_ptr =
674             ((EbReferenceObject *)pCs->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
675                 ->reference_picture16bit;
676 
677     else
678         recon_picture_ptr = pCs->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr;
679 
680     uint16_t *recon_buffer_y = (uint16_t *)recon_picture_ptr->buffer_y +
681         (recon_picture_ptr->origin_x + recon_picture_ptr->origin_y * recon_picture_ptr->stride_y);
682     uint16_t *recon_buffer_cb = (uint16_t *)recon_picture_ptr->buffer_cb +
683         (recon_picture_ptr->origin_x / 2 +
684          recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cb);
685     uint16_t *recon_buffer_cr = (uint16_t *)recon_picture_ptr->buffer_cr +
686         (recon_picture_ptr->origin_x / 2 +
687          recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cr);
688 
689     const int32_t num_planes = av1_num_planes(&scs_ptr->seq_header.color_config);
690     DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
691     uint16_t *linebuf[3];
692     uint16_t *colbuf[3];
693     CdefList  dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
694     uint8_t * row_cdef, *prev_row_cdef, *curr_row_cdef;
695     int32_t   cdef_count;
696     int32_t   dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
697     int32_t   var[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
698     int32_t   mi_wide_l2[3];
699     int32_t   mi_high_l2[3];
700     int32_t   xdec[3];
701     int32_t   ydec[3];
702     int32_t coeff_shift = AOMMAX(scs_ptr->static_config.encoder_bit_depth /*cm->bit_depth*/ - 8, 0);
703     const int32_t nvfb  = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
704     const int32_t nhfb  = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
705     row_cdef            = (uint8_t *)svt_aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
706     assert(row_cdef);
707     memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
708     prev_row_cdef = row_cdef + 1;
709     curr_row_cdef = prev_row_cdef + nhfb + 2;
710     for (int32_t pli = 0; pli < num_planes; pli++) {
711         int32_t subsampling_x = (pli == 0) ? 0 : 1;
712         int32_t subsampling_y = (pli == 0) ? 0 : 1;
713 
714         xdec[pli]       = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
715         ydec[pli]       = subsampling_y; //CHKN  xd->plane[pli].subsampling_y;
716         mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
717         mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
718     }
719 
720     const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
721     for (int32_t pli = 0; pli < num_planes; pli++) {
722         linebuf[pli] = (uint16_t *)svt_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
723         colbuf[pli]  = (uint16_t *)svt_aom_malloc(
724             sizeof(*colbuf) * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
725             CDEF_HBORDER);
726     }
727 
728     for (int32_t fbr = 0; fbr < nvfb; fbr++) {
729         for (int32_t pli = 0; pli < num_planes; pli++) {
730             const int32_t block_height = (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
731             fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER, CDEF_VERY_LARGE);
732         }
733 
734         int32_t cdef_left = 1;
735         for (int32_t fbc = 0; fbc < nhfb; fbc++) {
736             int32_t level, sec_strength;
737             int32_t uv_level, uv_sec_strength;
738             int32_t nhb, nvb;
739             int32_t cstart     = 0;
740             curr_row_cdef[fbc] = 0;
741 
742             //WAHT IS THIS  ?? CHKN -->for
743             if (pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] ==
744                     NULL ||
745                 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
746                         ->mbmi.cdef_strength == -1) {
747                 cdef_left = 0;
748                 SVT_LOG("\n\n\nCDEF ERROR: Skipping Current FB\n\n\n");
749                 continue;
750             }
751 
752             if (!cdef_left)
753                 cstart =
754                     -CDEF_HBORDER; //CHKN if the left block has not been filtered, then we can use samples on the left as input.
755 
756             nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
757             nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
758             int32_t frame_top, frame_left, frame_bottom, frame_right;
759 
760             int32_t mi_row = MI_SIZE_64X64 * fbr;
761             int32_t mi_col = MI_SIZE_64X64 * fbc;
762             // for the current filter block, it's top left corner mi structure (mi_tl)
763             // is first accessed to check whether the top and left boundaries are
764             // frame boundaries. Then bottom-left and top-right mi structures are
765             // accessed to check whether the bottom and right boundaries
766             // (respectively) are frame boundaries.
767             //
768             // Note that we can't just check the bottom-right mi structure - eg. if
769             // we're at the right-hand edge of the frame but not the bottom, then
770             // the bottom-right mi is NULL but the bottom-left is not.
771             frame_top  = (mi_row == 0) ? 1 : 0;
772             frame_left = (mi_col == 0) ? 1 : 0;
773 
774             if (fbr != nvfb - 1)
775                 frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
776             else
777                 frame_bottom = 1;
778 
779             if (fbc != nhfb - 1)
780                 frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
781             else
782                 frame_right = 1;
783 
784             const int32_t mbmi_cdef_strength =
785                 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
786                     ->mbmi.cdef_strength;
787             level = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
788             sec_strength = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] %
789                 CDEF_SEC_STRENGTHS;
790             sec_strength += sec_strength == 3;
791             uv_level = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] /
792                 CDEF_SEC_STRENGTHS;
793             uv_sec_strength = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] %
794                 CDEF_SEC_STRENGTHS;
795             uv_sec_strength += uv_sec_strength == 3;
796             if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
797                 (cdef_count = svt_sb_compute_cdef_list(
798                      pCs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
799                 cdef_left = 0;
800                 continue;
801             }
802 
803             curr_row_cdef[fbc] = 1;
804             for (int32_t pli = 0; pli < num_planes; pli++) {
805                 int32_t coffset;
806                 int32_t rend, cend;
807                 int32_t pri_damping = frm_hdr->cdef_params.cdef_damping;
808                 int32_t sec_damping = pri_damping;
809                 int32_t hsize       = nhb << mi_wide_l2[pli];
810                 int32_t vsize       = nvb << mi_high_l2[pli];
811 
812                 if (pli) {
813                     level        = uv_level;
814                     sec_strength = uv_sec_strength;
815                 }
816 
817                 if (fbc == nhfb - 1)
818                     cend = hsize;
819                 else
820                     cend = hsize + CDEF_HBORDER;
821 
822                 if (fbr == nvfb - 1)
823                     rend = vsize;
824                 else
825                     rend = vsize + CDEF_VBORDER;
826 
827                 coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
828                 if (fbc == nhfb - 1) {
829                     /* On the last superblock column, fill in the right border with
830                     CDEF_VERY_LARGE to avoid filtering with the outside. */
831                     fill_rect(&src[cend + CDEF_HBORDER],
832                               CDEF_BSTRIDE,
833                               rend + CDEF_VBORDER,
834                               hsize + CDEF_HBORDER - cend,
835                               CDEF_VERY_LARGE);
836                 }
837                 if (fbr == nvfb - 1) {
838                     /* On the last superblock row, fill in the bottom border with
839                     CDEF_VERY_LARGE to avoid filtering with the outside. */
840                     fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE],
841                               CDEF_BSTRIDE,
842                               CDEF_VBORDER,
843                               hsize + 2 * CDEF_HBORDER,
844                               CDEF_VERY_LARGE);
845                 }
846 
847                 uint16_t *rec_buff   = 0;
848                 uint32_t  rec_stride = 0;
849 
850                 switch (pli) {
851                 case 0:
852                     rec_buff   = recon_buffer_y;
853                     rec_stride = recon_picture_ptr->stride_y;
854                     break;
855                 case 1:
856                     rec_buff   = recon_buffer_cb;
857                     rec_stride = recon_picture_ptr->stride_cb;
858 
859                     break;
860                 case 2:
861                     rec_buff   = recon_buffer_cr;
862                     rec_stride = recon_picture_ptr->stride_cr;
863                     break;
864                 }
865 
866                 /* Copy in the pixels we need from the current superblock for
867                 deringing.*/
868 
869                 copy_sb16_16(&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
870                              CDEF_BSTRIDE,
871                              rec_buff,
872                              (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr,
873                              coffset + cstart,
874                              rec_stride,
875                              rend,
876                              cend - cstart);
877 
878                 if (!prev_row_cdef[fbc]) {
879                     copy_sb16_16(&src[CDEF_HBORDER],
880                                  CDEF_BSTRIDE,
881                                  rec_buff,
882                                  (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
883                                  coffset,
884                                  rec_stride,
885                                  CDEF_VBORDER,
886                                  hsize);
887                 } else if (fbr > 0) {
888                     copy_rect(&src[CDEF_HBORDER],
889                               CDEF_BSTRIDE,
890                               &linebuf[pli][coffset],
891                               stride,
892                               CDEF_VBORDER,
893                               hsize);
894                 } else {
895                     fill_rect(
896                         &src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, CDEF_VERY_LARGE);
897                 }
898 
899                 if (!prev_row_cdef[fbc - 1]) {
900                     copy_sb16_16(src,
901                                  CDEF_BSTRIDE,
902                                  rec_buff,
903                                  (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
904                                  coffset - CDEF_HBORDER,
905                                  rec_stride,
906                                  CDEF_VBORDER,
907                                  CDEF_HBORDER);
908                 } else if (fbr > 0 && fbc > 0) {
909                     copy_rect(src,
910                               CDEF_BSTRIDE,
911                               &linebuf[pli][coffset - CDEF_HBORDER],
912                               stride,
913                               CDEF_VBORDER,
914                               CDEF_HBORDER);
915                 } else {
916                     fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
917                 }
918 
919                 if (!prev_row_cdef[fbc + 1]) {
920                     copy_sb16_16(&src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
921                                  CDEF_BSTRIDE,
922                                  rec_buff,
923                                  (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
924                                  coffset + hsize,
925                                  rec_stride,
926                                  CDEF_VBORDER,
927                                  CDEF_HBORDER);
928                 } else if (fbr > 0 && fbc < nhfb - 1) {
929                     copy_rect(&src[hsize + CDEF_HBORDER],
930                               CDEF_BSTRIDE,
931                               &linebuf[pli][coffset + hsize],
932                               stride,
933                               CDEF_VBORDER,
934                               CDEF_HBORDER);
935                 } else {
936                     fill_rect(&src[hsize + CDEF_HBORDER],
937                               CDEF_BSTRIDE,
938                               CDEF_VBORDER,
939                               CDEF_HBORDER,
940                               CDEF_VERY_LARGE);
941                 }
942 
943                 if (cdef_left) {
944                     /* If we deringed the superblock on the left then we need to copy in
945                     saved pixels. */
946                     copy_rect(src,
947                               CDEF_BSTRIDE,
948                               colbuf[pli],
949                               CDEF_HBORDER,
950                               rend + CDEF_VBORDER,
951                               CDEF_HBORDER);
952                 }
953 
954                 /* Saving pixels in case we need to dering the superblock on the
955                 right. */
956                 if (fbc < nhfb - 1)
957                     copy_rect(colbuf[pli],
958                               CDEF_HBORDER,
959                               src + hsize,
960                               CDEF_BSTRIDE,
961                               rend + CDEF_VBORDER,
962                               CDEF_HBORDER);
963                 if (fbr < nvfb - 1)
964                     copy_sb16_16(&linebuf[pli][coffset],
965                                  stride,
966                                  rec_buff,
967                                  (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
968                                  coffset,
969                                  rec_stride,
970                                  CDEF_VBORDER,
971                                  hsize);
972                 if (frame_top) {
973                     fill_rect(
974                         src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
975                 }
976                 if (frame_left) {
977                     fill_rect(
978                         src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
979                 }
980                 if (frame_bottom) {
981                     fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE],
982                               CDEF_BSTRIDE,
983                               CDEF_VBORDER,
984                               hsize + 2 * CDEF_HBORDER,
985                               CDEF_VERY_LARGE);
986                 }
987                 if (frame_right) {
988                     fill_rect(&src[hsize + CDEF_HBORDER],
989                               CDEF_BSTRIDE,
990                               vsize + 2 * CDEF_VBORDER,
991                               CDEF_HBORDER,
992                               CDEF_VERY_LARGE);
993                 }
994 
995                 svt_cdef_filter_fb(NULL,
996                                    &rec_buff[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
997                                              (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
998                                    rec_stride,
999                                    &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER],
1000                                    xdec[pli],
1001                                    ydec[pli],
1002                                    dir,
1003                                    NULL,
1004                                    var,
1005                                    pli,
1006                                    dlist,
1007                                    cdef_count,
1008                                    level,
1009                                    sec_strength,
1010                                    pri_damping,
1011                                    sec_damping,
1012                                    coeff_shift);
1013             }
1014             cdef_left = 1; //CHKN filtered data is written back directy to recFrame.
1015         }
1016         {
1017             uint8_t *tmp  = prev_row_cdef;
1018             prev_row_cdef = curr_row_cdef;
1019             curr_row_cdef = tmp;
1020         }
1021     }
1022     svt_aom_free(row_cdef);
1023     for (int32_t pli = 0; pli < num_planes; pli++) {
1024         svt_aom_free(linebuf[pli]);
1025         svt_aom_free(colbuf[pli]);
1026     }
1027 }
1028 
1029 ///-------search
1030 
1031 /* Search for the best strength to add as an option, knowing we
1032 already selected nb_strengths options. */
search_one(int32_t * lev,int32_t nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int32_t sb_count,int32_t start_gi,int32_t end_gi)1033 static uint64_t search_one(int32_t *lev, int32_t nb_strengths, uint64_t mse[][TOTAL_STRENGTHS],
1034                            int32_t sb_count, int32_t start_gi, int32_t end_gi) {
1035     uint64_t      tot_mse[TOTAL_STRENGTHS];
1036     const int32_t total_strengths = end_gi;
1037     int32_t       i, j;
1038     uint64_t      best_tot_mse = (uint64_t)1 << 63;
1039     int32_t       best_id      = 0;
1040     memset(tot_mse, 0, sizeof(tot_mse));
1041     for (i = 0; i < sb_count; i++) {
1042         int32_t  gi;
1043         uint64_t best_mse = (uint64_t)1 << 63;
1044         /* Find best mse among already selected options. */
1045         for (gi = 0; gi < nb_strengths; gi++) {
1046             if (mse[i][lev[gi]] < best_mse)
1047                 best_mse = mse[i][lev[gi]];
1048         }
1049         /* Find best mse when adding each possible new option. */
1050 
1051         for (j = start_gi; j < total_strengths; j++) {
1052             uint64_t best = best_mse;
1053             if (mse[i][j] < best)
1054                 best = mse[i][j];
1055             tot_mse[j] += best;
1056         }
1057     }
1058     for (j = start_gi; j < total_strengths; j++) {
1059         if (tot_mse[j] < best_tot_mse) {
1060             best_tot_mse = tot_mse[j];
1061             best_id      = j;
1062         }
1063     }
1064     lev[nb_strengths] = best_id;
1065     return best_tot_mse;
1066 }
1067 
1068 /* Search for the best luma+chroma strength to add as an option, knowing we
1069 already selected nb_strengths options. */
svt_search_one_dual_c(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,int start_gi,int end_gi)1070 uint64_t svt_search_one_dual_c(int *lev0, int *lev1, int nb_strengths,
1071                                uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, int start_gi,
1072                                int end_gi) {
1073     uint64_t      tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
1074     int32_t       i, j;
1075     uint64_t      best_tot_mse    = (uint64_t)1 << 63;
1076     int32_t       best_id0        = 0;
1077     int32_t       best_id1        = 0;
1078     const int32_t total_strengths = end_gi;
1079     memset(tot_mse, 0, sizeof(tot_mse));
1080     for (i = 0; i < sb_count; i++) {
1081         int32_t  gi;
1082         uint64_t best_mse = (uint64_t)1 << 63;
1083         /* Find best mse among already selected options. */
1084         for (gi = 0; gi < nb_strengths; gi++) {
1085             uint64_t curr = mse[0][i][lev0[gi]];
1086             curr += mse[1][i][lev1[gi]];
1087             if (curr < best_mse)
1088                 best_mse = curr;
1089         }
1090         /* Find best mse when adding each possible new option. */
1091         for (j = start_gi; j < total_strengths; j++) {
1092             int32_t k;
1093             for (k = start_gi; k < total_strengths; k++) {
1094                 uint64_t best = best_mse;
1095                 uint64_t curr = mse[0][i][j];
1096                 curr += mse[1][i][k];
1097                 if (curr < best)
1098                     best = curr;
1099                 tot_mse[j][k] += best;
1100             }
1101         }
1102     }
1103 
1104     for (j = start_gi; j < total_strengths; j++) {
1105         int32_t k;
1106         for (k = start_gi; k < total_strengths; k++) {
1107             if (tot_mse[j][k] < best_tot_mse) {
1108                 best_tot_mse = tot_mse[j][k];
1109                 best_id0     = j;
1110                 best_id1     = k;
1111             }
1112         }
1113     }
1114     lev0[nb_strengths] = best_id0;
1115     lev1[nb_strengths] = best_id1;
1116     return best_tot_mse;
1117 }
1118 
1119 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int32_t * best_lev,int32_t nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int32_t sb_count,int32_t start_gi,int32_t end_gi)1120 static uint64_t joint_strength_search(int32_t *best_lev, int32_t nb_strengths,
1121                                       uint64_t mse[][TOTAL_STRENGTHS], int32_t sb_count,
1122                                       int32_t start_gi, int32_t end_gi) {
1123     uint64_t best_tot_mse;
1124     int32_t  i;
1125     best_tot_mse = (uint64_t)1 << 63;
1126     /* Greedy search: add one strength options at a time. */
1127     for (i = 0; i < nb_strengths; i++)
1128         best_tot_mse = search_one(best_lev, i, mse, sb_count, start_gi, end_gi);
1129     /* Trying to refine the greedy search by reconsidering each
1130     already-selected option. */
1131     for (i = 0; i < 4 * nb_strengths; i++) {
1132         int32_t j;
1133         for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
1134         best_tot_mse = search_one(best_lev, nb_strengths - 1, mse, sb_count, start_gi, end_gi);
1135     }
1136     return best_tot_mse;
1137 }
1138 
1139 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int32_t * best_lev0,int32_t * best_lev1,int32_t nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int32_t sb_count,int32_t start_gi,int32_t end_gi)1140 static uint64_t joint_strength_search_dual(int32_t *best_lev0, int32_t *best_lev1,
1141                                            int32_t nb_strengths, uint64_t (**mse)[TOTAL_STRENGTHS],
1142                                            int32_t sb_count, int32_t start_gi, int32_t end_gi) {
1143     uint64_t best_tot_mse;
1144     int32_t  i;
1145     best_tot_mse = (uint64_t)1 << 63;
1146     /* Greedy search: add one strength options at a time. */
1147     for (i = 0; i < nb_strengths; i++)
1148         best_tot_mse = svt_search_one_dual(
1149             best_lev0, best_lev1, i, mse, sb_count, start_gi, end_gi);
1150     /* Trying to refine the greedy search by reconsidering each
1151     already-selected option. */
1152     for (i = 0; i < 4 * nb_strengths; i++) {
1153         int32_t j;
1154         for (j = 0; j < nb_strengths - 1; j++) {
1155             best_lev0[j] = best_lev0[j + 1];
1156             best_lev1[j] = best_lev1[j + 1];
1157         }
1158         best_tot_mse = svt_search_one_dual(
1159             best_lev0, best_lev1, nb_strengths - 1, mse, sb_count, start_gi, end_gi);
1160     }
1161     return best_tot_mse;
1162 }
1163 
1164 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx)                \
1165     get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, (strength_idx)); \
1166     cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;
finish_cdef_search(EncDecContext * context_ptr,PictureControlSet * pcs_ptr,int32_t selected_strength_cnt[64])1167 void finish_cdef_search(EncDecContext *context_ptr, PictureControlSet *pcs_ptr,
1168                         int32_t selected_strength_cnt[64]) {
1169     (void)context_ptr;
1170     struct PictureParentControlSet *ppcs    = pcs_ptr->parent_pcs_ptr;
1171     FrameHeader *                   frm_hdr = &ppcs->frm_hdr;
1172     Av1Common *                     cm      = ppcs->av1_cm;
1173     int32_t                         mi_rows = ppcs->av1_cm->mi_rows;
1174     int32_t                         mi_cols = ppcs->av1_cm->mi_cols;
1175 
1176     int32_t fbr, fbc;
1177 
1178     int32_t pli;
1179 
1180     uint64_t         best_tot_mse = (uint64_t)1 << 63;
1181     int32_t          sb_count;
1182     int32_t          nvfb              = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1183     int32_t          nhfb              = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1184     int32_t *        sb_index          = (int32_t *)malloc(nvfb * nhfb * sizeof(*sb_index));
1185     int32_t *        selected_strength = (int32_t *)malloc(nvfb * nhfb * sizeof(*sb_index));
1186     int32_t          start_gi;
1187     int32_t          end_gi;
1188     CDEF_PICK_METHOD pick_method = pcs_ptr->parent_pcs_ptr->cdef_level == 2 ? CDEF_FAST_SEARCH_LVL1
1189         : pcs_ptr->parent_pcs_ptr->cdef_level == 3                          ? CDEF_FAST_SEARCH_LVL2
1190         : pcs_ptr->parent_pcs_ptr->cdef_level > 3 ?  CDEF_FAST_SEARCH_LVL3 : 0;
1191 
1192     const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && pick_method <= CDEF_FAST_SEARCH_LVL3);
1193     assert(sb_index != NULL);
1194     assert(selected_strength != NULL);
1195 
1196     start_gi = 0;
1197     end_gi   = nb_cdef_strengths[pick_method];
1198 
1199     uint64_t(*mse[2])[TOTAL_STRENGTHS];
1200     int32_t       pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1201     int32_t       i;
1202     int32_t       nb_strengths;
1203     int32_t       nb_strength_bits;
1204     uint64_t      lambda;
1205     const int32_t num_planes = 3; // av1_num_planes(cm);
1206     uint16_t qp_index = (uint8_t)pcs_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx;
1207     uint32_t fast_lambda, full_lambda = 0;
1208     (*av1_lambda_assignment_function_table[pcs_ptr->parent_pcs_ptr->pred_structure])(
1209         pcs_ptr,
1210         &fast_lambda,
1211         &full_lambda,
1212         (uint8_t)pcs_ptr->parent_pcs_ptr->enhanced_picture_ptr->bit_depth,
1213         qp_index,
1214         EB_FALSE);
1215     lambda = full_lambda;
1216 
1217     mse[0] = (uint64_t(*)[64])malloc(sizeof(**mse) * nvfb * nhfb);
1218     mse[1] = (uint64_t(*)[64])malloc(sizeof(**mse) * nvfb * nhfb);
1219 
1220     sb_count = 0;
1221     for (fbr = 0; fbr < nvfb; ++fbr) {
1222         for (fbc = 0; fbc < nhfb; ++fbc) {
1223             ModeInfo **mi = pcs_ptr->mi_grid_base + MI_SIZE_64X64 * fbr * cm->mi_stride +
1224                 MI_SIZE_64X64 * fbc;
1225             const MbModeInfo *mbmi = &mi[0]->mbmi;
1226 
1227             if (((fbc & 1) &&
1228                  (mbmi->block_mi.sb_type == BLOCK_128X128 ||
1229                   mbmi->block_mi.sb_type == BLOCK_128X64)) ||
1230                 ((fbr & 1) &&
1231                  (mbmi->block_mi.sb_type == BLOCK_128X128 ||
1232                   mbmi->block_mi.sb_type == BLOCK_64X128))) {
1233                 continue;
1234             }
1235 
1236             // No filtering if the entire filter block is skipped
1237             if (svt_sb_all_skip(pcs_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64))
1238                 continue;
1239 
1240             for (pli = 0; pli < num_planes; pli++) {
1241                 if (pli == 0)
1242                     svt_memcpy(mse[0][sb_count],
1243                                pcs_ptr->mse_seg[0][fbr * nhfb + fbc],
1244                                TOTAL_STRENGTHS * sizeof(uint64_t));
1245                 if (pli == 2)
1246                     svt_memcpy(mse[1][sb_count],
1247                                pcs_ptr->mse_seg[1][fbr * nhfb + fbc],
1248                                TOTAL_STRENGTHS * sizeof(uint64_t));
1249                 sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs_ptr->mi_stride + MI_SIZE_64X64 * fbc;
1250             }
1251             sb_count++;
1252         }
1253     }
1254 
1255     nb_strength_bits = 0;
1256     /* Search for different number of signalling bits. */
1257     for (i = 0; i <= 3; i++) {
1258         int32_t best_lev0[CDEF_MAX_STRENGTHS];
1259         int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0};
1260         nb_strengths                          = 1 << i;
1261         uint64_t tot_mse                      = joint_strength_search_dual(
1262             best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi);
1263         (void)joint_strength_search;
1264         /* Count superblock signalling cost. */
1265         const int      total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2;
1266         const int      rate_cost  = av1_cost_literal(total_bits);
1267         const uint64_t dist       = tot_mse * 16;
1268         tot_mse                   = RDCOST(lambda, rate_cost, dist);
1269         if (tot_mse < best_tot_mse) {
1270             best_tot_mse     = tot_mse;
1271             nb_strength_bits = i;
1272             for (int32_t j = 0; j < 1 << nb_strength_bits; j++) {
1273                 frm_hdr->cdef_params.cdef_y_strength[j]  = best_lev0[j];
1274                 frm_hdr->cdef_params.cdef_uv_strength[j] = best_lev1[j];
1275             }
1276         }
1277     }
1278     nb_strengths = 1 << nb_strength_bits;
1279 
1280     frm_hdr->cdef_params.cdef_bits = nb_strength_bits;
1281     ppcs->nb_cdef_strengths        = nb_strengths;
1282     for (i = 0; i < sb_count; i++) {
1283         int32_t  gi;
1284         int32_t  best_gi;
1285         uint64_t best_mse = (uint64_t)1 << 63;
1286         best_gi           = 0;
1287         for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) {
1288             uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]];
1289             curr += mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]];
1290             if (curr < best_mse) {
1291                 best_gi  = gi;
1292                 best_mse = curr;
1293             }
1294         }
1295         selected_strength[i] = best_gi;
1296         selected_strength_cnt[best_gi]++;
1297 
1298         pcs_ptr->mi_grid_base[sb_index[i]]->mbmi.cdef_strength = (int8_t)best_gi;
1299         //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
1300         //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
1301         BlockSize sb_type = pcs_ptr->mi_grid_base[sb_index[i]]->mbmi.block_mi.sb_type;
1302 
1303         switch (sb_type) {
1304         case BLOCK_128X128:
1305             pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)
1306                 best_gi;
1307             pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs_ptr->mi_stride]
1308                 ->mbmi.cdef_strength = (int8_t)best_gi;
1309             pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs_ptr->mi_stride + MI_SIZE_64X64]
1310                 ->mbmi.cdef_strength = (int8_t)best_gi;
1311             break;
1312         case BLOCK_128X64:
1313             pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)
1314                 best_gi;
1315             break;
1316         case BLOCK_64X128:
1317             pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs_ptr->mi_stride]
1318                 ->mbmi.cdef_strength = (int8_t)best_gi;
1319             break;
1320         default: break;
1321         }
1322     }
1323     if (fast) {
1324         for (int j = 0; j < ppcs->nb_cdef_strengths; j++) {
1325             const int luma_strength   = frm_hdr->cdef_params.cdef_y_strength[j];
1326             const int chroma_strength = frm_hdr->cdef_params.cdef_uv_strength[j];
1327             int       pri_strength, sec_strength;
1328             STORE_CDEF_FILTER_STRENGTH(
1329                 frm_hdr->cdef_params.cdef_y_strength[j], pick_method, luma_strength);
1330             STORE_CDEF_FILTER_STRENGTH(
1331                 frm_hdr->cdef_params.cdef_uv_strength[j], pick_method, chroma_strength);
1332         }
1333     }
1334     //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping
1335     frm_hdr->cdef_params.cdef_damping = pri_damping;
1336     free(mse[0]);
1337     free(mse[1]);
1338     free(sb_index);
1339     free(selected_strength);
1340 }
1341