1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10 */
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <math.h>
14 #include <string.h>
15
16 #include "EbEncCdef.h"
17 #include <stdint.h>
18 #include "aom_dsp_rtcd.h"
19 #include "EbLog.h"
20
21 extern int16_t svt_av1_ac_quant_q3(int32_t qindex, int32_t delta, AomBitDepth bit_depth);
22
23 #include "EbRateDistortionCost.h"
24
dist_8x8_16bit_c(const uint16_t * src,const uint16_t * dst,const int32_t dstride,const int32_t coeff_shift)25 static INLINE uint64_t dist_8x8_16bit_c(const uint16_t *src, const uint16_t *dst,
26 const int32_t dstride, const int32_t coeff_shift) {
27 uint64_t svar = 0;
28 uint64_t dvar = 0;
29 uint64_t sum_s = 0;
30 uint64_t sum_d = 0;
31 uint64_t sum_s2 = 0;
32 uint64_t sum_d2 = 0;
33 uint64_t sum_sd = 0;
34 int32_t i, j;
35 for (i = 0; i < 8; i++) {
36 for (j = 0; j < 8; j++) {
37 sum_s += src[8 * i + j];
38 sum_d += dst[i * dstride + j];
39 sum_s2 += src[8 * i + j] * src[8 * i + j];
40 sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
41 sum_sd += src[8 * i + j] * dst[i * dstride + j];
42 }
43 }
44 /* Compute the variance -- the calculation cannot go negative. */
45 svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
46 dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
47 return (uint64_t)floor(.5 +
48 (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
49 (svar + dvar + (400 << 2 * coeff_shift)) /
50 (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
51 }
52
mse_8_16bit(const uint16_t * src,const uint16_t * dst,const int32_t dstride,const int32_t height)53 static INLINE uint64_t mse_8_16bit(const uint16_t *src, const uint16_t *dst, const int32_t dstride,
54 const int32_t height) {
55 uint64_t sum = 0;
56 int32_t i, j;
57 for (i = 0; i < height; i++) {
58 for (j = 0; j < 8; j++) {
59 int32_t e = dst[i * dstride + j] - src[8 * i + j];
60 sum += e * e;
61 }
62 }
63 return sum;
64 }
65
mse_4_16bit_c(const uint16_t * src,const uint16_t * dst,const int32_t dstride,const int32_t height)66 static INLINE uint64_t mse_4_16bit_c(const uint16_t *src, const uint16_t *dst,
67 const int32_t dstride, const int32_t height) {
68 uint64_t sum = 0;
69 int32_t i, j;
70 for (i = 0; i < height; i++) {
71 for (j = 0; j < 4; j++) {
72 int32_t e = dst[i * dstride + j] - src[4 * i + j];
73 sum += e * e;
74 }
75 }
76 return sum;
77 }
78
dist_8x8_8bit_c(const uint8_t * src,const uint8_t * dst,const int32_t dstride,const int32_t coeff_shift)79 static INLINE uint64_t dist_8x8_8bit_c(const uint8_t *src, const uint8_t *dst,
80 const int32_t dstride, const int32_t coeff_shift) {
81 uint64_t svar = 0;
82 uint64_t dvar = 0;
83 uint64_t sum_s = 0;
84 uint64_t sum_d = 0;
85 uint64_t sum_s2 = 0;
86 uint64_t sum_d2 = 0;
87 uint64_t sum_sd = 0;
88 int32_t i, j;
89 for (i = 0; i < 8; i++) {
90 for (j = 0; j < 8; j++) {
91 sum_s += src[8 * i + j];
92 sum_d += dst[i * dstride + j];
93 sum_s2 += src[8 * i + j] * src[8 * i + j];
94 sum_d2 += dst[i * dstride + j] * dst[i * dstride + j];
95 sum_sd += src[8 * i + j] * dst[i * dstride + j];
96 }
97 }
98 /* Compute the variance -- the calculation cannot go negative. */
99 svar = sum_s2 - ((sum_s * sum_s + 32) >> 6);
100 dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6);
101 return (uint64_t)floor(.5 +
102 (sum_d2 + sum_s2 - 2 * sum_sd) * .5 *
103 (svar + dvar + (400 << 2 * coeff_shift)) /
104 (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar)));
105 }
106
mse_8_8bit(const uint8_t * src,const uint8_t * dst,const int32_t dstride,const int32_t height)107 static INLINE uint64_t mse_8_8bit(const uint8_t *src, const uint8_t *dst, const int32_t dstride,
108 const int32_t height) {
109 uint64_t sum = 0;
110 int32_t i, j;
111 for (i = 0; i < height; i++) {
112 for (j = 0; j < 8; j++) {
113 int32_t e = dst[i * dstride + j] - src[8 * i + j];
114 sum += e * e;
115 }
116 }
117 return sum;
118 }
119
mse_4_8bit_c(const uint8_t * src,const uint8_t * dst,const int32_t dstride,const int32_t height)120 static INLINE uint64_t mse_4_8bit_c(const uint8_t *src, const uint8_t *dst, const int32_t dstride,
121 const int32_t height) {
122 uint64_t sum = 0;
123 int32_t i, j;
124 for (i = 0; i < height; i++) {
125 for (j = 0; j < 4; j++) {
126 int32_t e = dst[i * dstride + j] - src[4 * i + j];
127 sum += e * e;
128 }
129 }
130 return sum;
131 }
132
133 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_c(const uint16_t * dst,int32_t dstride,const uint16_t * src,const CdefList * dlist,int32_t cdef_count,BlockSize bsize,int32_t coeff_shift,int32_t pli)134 uint64_t compute_cdef_dist_c(const uint16_t *dst, int32_t dstride, const uint16_t *src,
135 const CdefList *dlist, int32_t cdef_count, BlockSize bsize,
136 int32_t coeff_shift, int32_t pli) {
137 uint64_t sum = 0;
138 int32_t bi, bx, by;
139 if (bsize == BLOCK_8X8) {
140 for (bi = 0; bi < cdef_count; bi++) {
141 by = dlist[bi].by;
142 bx = dlist[bi].bx;
143 if (pli == 0) {
144 sum += dist_8x8_16bit_c(&src[bi << (3 + 3)],
145 &dst[(by << 3) * dstride + (bx << 3)],
146 dstride,
147 coeff_shift);
148 } else
149 sum += mse_8_16bit(
150 &src[bi << (3 + 3)], &dst[(by << 3) * dstride + (bx << 3)], dstride, 8);
151 }
152 } else if (bsize == BLOCK_4X8) {
153 for (bi = 0; bi < cdef_count; bi++) {
154 by = dlist[bi].by;
155 bx = dlist[bi].bx;
156 sum += mse_4_16bit_c(
157 &src[bi << (3 + 2)], &dst[(by << 3) * dstride + (bx << 2)], dstride, 8);
158 }
159 } else if (bsize == BLOCK_8X4) {
160 for (bi = 0; bi < cdef_count; bi++) {
161 by = dlist[bi].by;
162 bx = dlist[bi].bx;
163 sum += mse_8_16bit(
164 &src[bi << (2 + 3)], &dst[(by << 2) * dstride + (bx << 3)], dstride, 4);
165 }
166 } else {
167 assert(bsize == BLOCK_4X4);
168 for (bi = 0; bi < cdef_count; bi++) {
169 by = dlist[bi].by;
170 bx = dlist[bi].bx;
171 sum += mse_4_16bit_c(
172 &src[bi << (2 + 2)], &dst[(by << 2) * dstride + (bx << 2)], dstride, 4);
173 }
174 }
175 return sum >> 2 * coeff_shift;
176 }
177
compute_cdef_dist_8bit_c(const uint8_t * dst8,int32_t dstride,const uint8_t * src8,const CdefList * dlist,int32_t cdef_count,BlockSize bsize,int32_t coeff_shift,int32_t pli)178 uint64_t compute_cdef_dist_8bit_c(const uint8_t *dst8, int32_t dstride, const uint8_t *src8,
179 const CdefList *dlist, int32_t cdef_count, BlockSize bsize,
180 int32_t coeff_shift, int32_t pli) {
181 uint64_t sum = 0;
182 int32_t bi, bx, by;
183 if (bsize == BLOCK_8X8) {
184 for (bi = 0; bi < cdef_count; bi++) {
185 by = dlist[bi].by;
186 bx = dlist[bi].bx;
187 if (pli == 0) {
188 sum += dist_8x8_8bit_c(&src8[bi << (3 + 3)],
189 &dst8[(by << 3) * dstride + (bx << 3)],
190 dstride,
191 coeff_shift);
192 } else
193 sum += mse_8_8bit(
194 &src8[bi << (3 + 3)], &dst8[(by << 3) * dstride + (bx << 3)], dstride, 8);
195 }
196 } else if (bsize == BLOCK_4X8) {
197 for (bi = 0; bi < cdef_count; bi++) {
198 by = dlist[bi].by;
199 bx = dlist[bi].bx;
200 sum += mse_4_8bit_c(
201 &src8[bi << (3 + 2)], &dst8[(by << 3) * dstride + (bx << 2)], dstride, 8);
202 }
203 } else if (bsize == BLOCK_8X4) {
204 for (bi = 0; bi < cdef_count; bi++) {
205 by = dlist[bi].by;
206 bx = dlist[bi].bx;
207 sum += mse_8_8bit(
208 &src8[bi << (2 + 3)], &dst8[(by << 2) * dstride + (bx << 3)], dstride, 4);
209 }
210 } else {
211 assert(bsize == BLOCK_4X4);
212 for (bi = 0; bi < cdef_count; bi++) {
213 by = dlist[bi].by;
214 bx = dlist[bi].bx;
215 sum += mse_4_8bit_c(
216 &src8[bi << (2 + 2)], &dst8[(by << 2) * dstride + (bx << 2)], dstride, 4);
217 }
218 }
219 return sum >> 2 * coeff_shift;
220 }
221
svt_sb_all_skip(PictureControlSet * pcs_ptr,const Av1Common * const cm,int32_t mi_row,int32_t mi_col)222 int32_t svt_sb_all_skip(PictureControlSet *pcs_ptr, const Av1Common *const cm, int32_t mi_row,
223 int32_t mi_col) {
224 int32_t maxc, maxr;
225 int32_t skip = 1;
226 maxc = cm->mi_cols - mi_col;
227 maxr = cm->mi_rows - mi_row;
228
229 maxr = AOMMIN(maxr, MI_SIZE_64X64);
230 maxc = AOMMIN(maxc, MI_SIZE_64X64);
231
232 for (int32_t r = 0; r < maxr; r++) {
233 for (int32_t c = 0; c < maxc; c++) {
234 skip = skip &&
235 pcs_ptr->mi_grid_base[(mi_row + r) * pcs_ptr->mi_stride + mi_col + c]
236 ->mbmi.block_mi.skip;
237 }
238 }
239 return skip;
240 }
241
is_8x8_block_skip(ModeInfo ** grid,int32_t mi_row,int32_t mi_col,int32_t mi_stride)242 static int32_t is_8x8_block_skip(ModeInfo **grid, int32_t mi_row, int32_t mi_col,
243 int32_t mi_stride) {
244 int32_t is_skip = 1;
245 for (int32_t r = 0; r < mi_size_high[BLOCK_8X8]; ++r)
246 for (int32_t c = 0; c < mi_size_wide[BLOCK_8X8]; ++c)
247 is_skip &= (int32_t)(grid[(mi_row + r) * mi_stride + (mi_col + c)]->mbmi.block_mi.skip);
248
249 return is_skip;
250 }
251
svt_sb_compute_cdef_list(PictureControlSet * pcs_ptr,const Av1Common * const cm,int32_t mi_row,int32_t mi_col,CdefList * dlist,BlockSize bs)252 int32_t svt_sb_compute_cdef_list(PictureControlSet *pcs_ptr, const Av1Common *const cm,
253 int32_t mi_row, int32_t mi_col, CdefList *dlist, BlockSize bs) {
254 //MbModeInfo **grid = cm->mi_grid_visible;
255 ModeInfo **grid = pcs_ptr->mi_grid_base;
256
257 int32_t maxc = cm->mi_cols - mi_col;
258 int32_t maxr = cm->mi_rows - mi_row;
259
260 if (bs == BLOCK_128X128 || bs == BLOCK_128X64)
261 maxc = AOMMIN(maxc, MI_SIZE_128X128);
262 else
263 maxc = AOMMIN(maxc, MI_SIZE_64X64);
264 if (bs == BLOCK_128X128 || bs == BLOCK_64X128)
265 maxr = AOMMIN(maxr, MI_SIZE_128X128);
266 else
267 maxr = AOMMIN(maxr, MI_SIZE_64X64);
268
269 const int32_t r_step = mi_size_high[BLOCK_8X8];
270 const int32_t c_step = mi_size_wide[BLOCK_8X8];
271 const int32_t r_shift = (r_step == 2);
272 const int32_t c_shift = (c_step == 2);
273
274 assert(r_step == 1 || r_step == 2);
275 assert(c_step == 1 || c_step == 2);
276
277 int32_t count = 0;
278
279 for (int32_t r = 0; r < maxr; r += r_step) {
280 for (int32_t c = 0; c < maxc; c += c_step) {
281 if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, pcs_ptr->mi_stride)) {
282 dlist[count].by = (uint8_t)(r >> r_shift);
283 dlist[count].bx = (uint8_t)(c >> c_shift);
284 dlist[count].skip = 0;
285 count++;
286 }
287 }
288 }
289 return count;
290 }
291
svt_av1_cdef_frame(EncDecContext * context_ptr,SequenceControlSet * scs_ptr,PictureControlSet * pCs)292 void svt_av1_cdef_frame(EncDecContext *context_ptr, SequenceControlSet *scs_ptr,
293 PictureControlSet *pCs) {
294 (void)context_ptr;
295
296 struct PictureParentControlSet *ppcs = pCs->parent_pcs_ptr;
297 Av1Common * cm = ppcs->av1_cm;
298 FrameHeader * frm_hdr = &ppcs->frm_hdr;
299
300 EbPictureBufferDesc *recon_picture_ptr;
301
302 if (ppcs->is_used_as_reference_flag == EB_TRUE)
303 recon_picture_ptr =
304 ((EbReferenceObject *)pCs->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
305 ->reference_picture;
306 else
307 recon_picture_ptr = pCs->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
308
309 EbByte recon_buffer_y = &(
310 (recon_picture_ptr->buffer_y)[recon_picture_ptr->origin_x +
311 recon_picture_ptr->origin_y * recon_picture_ptr->stride_y]);
312 EbByte recon_buffer_cb = &((recon_picture_ptr->buffer_cb)[recon_picture_ptr->origin_x / 2 +
313 recon_picture_ptr->origin_y / 2 *
314 recon_picture_ptr->stride_cb]);
315 EbByte recon_buffer_cr = &((recon_picture_ptr->buffer_cr)[recon_picture_ptr->origin_x / 2 +
316 recon_picture_ptr->origin_y / 2 *
317 recon_picture_ptr->stride_cr]);
318
319 const int32_t num_planes = av1_num_planes(&scs_ptr->seq_header.color_config);
320 DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
321 uint16_t *linebuf[3];
322 uint16_t *colbuf[3];
323 CdefList dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
324 uint8_t * row_cdef, *prev_row_cdef, *curr_row_cdef;
325 int32_t cdef_count;
326 int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
327 int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
328 int32_t mi_wide_l2[3];
329 int32_t mi_high_l2[3];
330 int32_t xdec[3];
331 int32_t ydec[3];
332 int32_t coeff_shift = AOMMAX(scs_ptr->static_config.encoder_bit_depth /*cm->bit_depth*/ - 8, 0);
333 const int32_t nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
334 const int32_t nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
335 //svt_av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, num_planes);
336 row_cdef = (uint8_t *)svt_aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
337 assert(row_cdef != NULL);
338 memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
339 prev_row_cdef = row_cdef + 1;
340 curr_row_cdef = prev_row_cdef + nhfb + 2;
341 for (int32_t pli = 0; pli < num_planes; pli++) {
342 int32_t subsampling_x = (pli == 0) ? 0 : 1;
343 int32_t subsampling_y = (pli == 0) ? 0 : 1;
344
345 xdec[pli] = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
346 ydec[pli] = subsampling_y; //CHKN xd->plane[pli].subsampling_y;
347 mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
348 mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
349 }
350
351 const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
352 for (int32_t pli = 0; pli < num_planes; pli++) {
353 linebuf[pli] = (uint16_t *)svt_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
354 colbuf[pli] = (uint16_t *)svt_aom_malloc(
355 sizeof(*colbuf) * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
356 CDEF_HBORDER);
357 }
358
359 for (int32_t fbr = 0; fbr < nvfb; fbr++) {
360 for (int32_t pli = 0; pli < num_planes; pli++) {
361 const int32_t block_height = (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
362 fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER, CDEF_VERY_LARGE);
363 }
364
365 int32_t cdef_left = 1;
366 for (int32_t fbc = 0; fbc < nhfb; fbc++) {
367 int32_t level, sec_strength;
368 int32_t uv_level, uv_sec_strength;
369 int32_t nhb, nvb;
370 int32_t cstart = 0;
371 curr_row_cdef[fbc] = 0;
372
373 //WAHT IS THIS ?? CHKN -->for
374 if (pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] ==
375 NULL ||
376 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
377 ->mbmi.cdef_strength == -1) {
378 cdef_left = 0;
379 SVT_LOG("\n\n\nCDEF ERROR: Skipping Current FB\n\n\n");
380 continue;
381 }
382
383 if (!cdef_left)
384 cstart =
385 -CDEF_HBORDER; //CHKN if the left block has not been filtered, then we can use samples on the left as input.
386
387 nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
388 nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
389 int32_t frame_top, frame_left, frame_bottom, frame_right;
390
391 int32_t mi_row = MI_SIZE_64X64 * fbr;
392 int32_t mi_col = MI_SIZE_64X64 * fbc;
393 // for the current filter block, it's top left corner mi structure (mi_tl)
394 // is first accessed to check whether the top and left boundaries are
395 // frame boundaries. Then bottom-left and top-right mi structures are
396 // accessed to check whether the bottom and right boundaries
397 // (respectively) are frame boundaries.
398 //
399 // Note that we can't just check the bottom-right mi structure - eg. if
400 // we're at the right-hand edge of the frame but not the bottom, then
401 // the bottom-right mi is NULL but the bottom-left is not.
402 frame_top = (mi_row == 0) ? 1 : 0;
403 frame_left = (mi_col == 0) ? 1 : 0;
404
405 if (fbr != nvfb - 1)
406 frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
407 else
408 frame_bottom = 1;
409
410 if (fbc != nhfb - 1)
411 frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
412 else
413 frame_right = 1;
414
415 const int32_t mbmi_cdef_strength =
416 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
417 ->mbmi.cdef_strength;
418 level = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
419 sec_strength = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] %
420 CDEF_SEC_STRENGTHS;
421 sec_strength += sec_strength == 3;
422 uv_level = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] /
423 CDEF_SEC_STRENGTHS;
424 uv_sec_strength = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] %
425 CDEF_SEC_STRENGTHS;
426 uv_sec_strength += uv_sec_strength == 3;
427 if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
428 (cdef_count = svt_sb_compute_cdef_list(
429 pCs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
430 cdef_left = 0;
431 continue;
432 }
433
434 curr_row_cdef[fbc] = 1;
435 for (int32_t pli = 0; pli < num_planes; pli++) {
436 int32_t coffset;
437 int32_t rend, cend;
438 int32_t pri_damping = frm_hdr->cdef_params.cdef_damping;
439 int32_t sec_damping = pri_damping;
440 int32_t hsize = nhb << mi_wide_l2[pli];
441 int32_t vsize = nvb << mi_high_l2[pli];
442
443 if (pli) {
444 level = uv_level;
445 sec_strength = uv_sec_strength;
446 }
447
448 if (fbc == nhfb - 1)
449 cend = hsize;
450 else
451 cend = hsize + CDEF_HBORDER;
452
453 if (fbr == nvfb - 1)
454 rend = vsize;
455 else
456 rend = vsize + CDEF_VBORDER;
457
458 coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
459 if (fbc == nhfb - 1) {
460 /* On the last superblock column, fill in the right border with
461 CDEF_VERY_LARGE to avoid filtering with the outside. */
462 fill_rect(&src[cend + CDEF_HBORDER],
463 CDEF_BSTRIDE,
464 rend + CDEF_VBORDER,
465 hsize + CDEF_HBORDER - cend,
466 CDEF_VERY_LARGE);
467 }
468 if (fbr == nvfb - 1) {
469 /* On the last superblock row, fill in the bottom border with
470 CDEF_VERY_LARGE to avoid filtering with the outside. */
471 fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE],
472 CDEF_BSTRIDE,
473 CDEF_VBORDER,
474 hsize + 2 * CDEF_HBORDER,
475 CDEF_VERY_LARGE);
476 }
477
478 uint8_t *rec_buff = 0;
479 uint32_t rec_stride = 0;
480
481 switch (pli) {
482 case 0:
483 rec_buff = recon_buffer_y;
484 rec_stride = recon_picture_ptr->stride_y;
485 break;
486 case 1:
487 rec_buff = recon_buffer_cb;
488 rec_stride = recon_picture_ptr->stride_cb;
489
490 break;
491 case 2:
492 rec_buff = recon_buffer_cr;
493 rec_stride = recon_picture_ptr->stride_cr;
494 break;
495 }
496
497 /* Copy in the pixels we need from the current superblock for
498 deringing.*/
499 copy_sb8_16(&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
500 CDEF_BSTRIDE,
501 rec_buff,
502 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr,
503 coffset + cstart,
504 rec_stride,
505 rend,
506 cend - cstart);
507 if (!prev_row_cdef[fbc]) {
508 copy_sb8_16(&src[CDEF_HBORDER],
509 CDEF_BSTRIDE,
510 rec_buff,
511 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
512 coffset,
513 rec_stride,
514 CDEF_VBORDER,
515 hsize);
516 } else if (fbr > 0) {
517 copy_rect(&src[CDEF_HBORDER],
518 CDEF_BSTRIDE,
519 &linebuf[pli][coffset],
520 stride,
521 CDEF_VBORDER,
522 hsize);
523 } else {
524 fill_rect(
525 &src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, CDEF_VERY_LARGE);
526 }
527
528 if (!prev_row_cdef[fbc - 1]) {
529 copy_sb8_16(src,
530 CDEF_BSTRIDE,
531 rec_buff,
532 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
533 coffset - CDEF_HBORDER,
534 rec_stride,
535 CDEF_VBORDER,
536 CDEF_HBORDER);
537 } else if (fbr > 0 && fbc > 0) {
538 copy_rect(src,
539 CDEF_BSTRIDE,
540 &linebuf[pli][coffset - CDEF_HBORDER],
541 stride,
542 CDEF_VBORDER,
543 CDEF_HBORDER);
544 } else {
545 fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
546 }
547
548 if (!prev_row_cdef[fbc + 1]) {
549 copy_sb8_16(&src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
550 CDEF_BSTRIDE,
551 rec_buff,
552 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
553 coffset + hsize,
554 rec_stride,
555 CDEF_VBORDER,
556 CDEF_HBORDER);
557 } else if (fbr > 0 && fbc < nhfb - 1) {
558 copy_rect(&src[hsize + CDEF_HBORDER],
559 CDEF_BSTRIDE,
560 &linebuf[pli][coffset + hsize],
561 stride,
562 CDEF_VBORDER,
563 CDEF_HBORDER);
564 } else {
565 fill_rect(&src[hsize + CDEF_HBORDER],
566 CDEF_BSTRIDE,
567 CDEF_VBORDER,
568 CDEF_HBORDER,
569 CDEF_VERY_LARGE);
570 }
571
572 if (cdef_left) {
573 /* If we deringed the superblock on the left then we need to copy in
574 saved pixels. */
575 copy_rect(src,
576 CDEF_BSTRIDE,
577 colbuf[pli],
578 CDEF_HBORDER,
579 rend + CDEF_VBORDER,
580 CDEF_HBORDER);
581 }
582
583 /* Saving pixels in case we need to dering the superblock on the
584 right. */
585 if (fbc < nhfb - 1)
586 copy_rect(colbuf[pli],
587 CDEF_HBORDER,
588 src + hsize,
589 CDEF_BSTRIDE,
590 rend + CDEF_VBORDER,
591 CDEF_HBORDER);
592
593 if (fbr < nvfb - 1)
594 copy_sb8_16(&linebuf[pli][coffset],
595 stride,
596 rec_buff,
597 (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
598 coffset,
599 rec_stride,
600 CDEF_VBORDER,
601 hsize);
602
603 if (frame_top) {
604 fill_rect(
605 src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
606 }
607 if (frame_left) {
608 fill_rect(
609 src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
610 }
611 if (frame_bottom) {
612 fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE],
613 CDEF_BSTRIDE,
614 CDEF_VBORDER,
615 hsize + 2 * CDEF_HBORDER,
616 CDEF_VERY_LARGE);
617 }
618 if (frame_right) {
619 fill_rect(&src[hsize + CDEF_HBORDER],
620 CDEF_BSTRIDE,
621 vsize + 2 * CDEF_VBORDER,
622 CDEF_HBORDER,
623 CDEF_VERY_LARGE);
624 }
625
626 {
627 svt_cdef_filter_fb(
628 &rec_buff[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
629 (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
630 NULL,
631 rec_stride,
632 &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER],
633 xdec[pli],
634 ydec[pli],
635 dir,
636 NULL,
637 var,
638 pli,
639 dlist,
640 cdef_count,
641 level,
642 sec_strength,
643 pri_damping,
644 sec_damping,
645 coeff_shift);
646 }
647 }
648 cdef_left = 1; //CHKN filtered data is written back directy to recFrame.
649 }
650 {
651 uint8_t *tmp = prev_row_cdef;
652 prev_row_cdef = curr_row_cdef;
653 curr_row_cdef = tmp;
654 }
655 }
656 svt_aom_free(row_cdef);
657 for (int32_t pli = 0; pli < num_planes; pli++) {
658 svt_aom_free(linebuf[pli]);
659 svt_aom_free(colbuf[pli]);
660 }
661 }
662
av1_cdef_frame16bit(EncDecContext * context_ptr,SequenceControlSet * scs_ptr,PictureControlSet * pCs)663 void av1_cdef_frame16bit(EncDecContext *context_ptr, SequenceControlSet *scs_ptr,
664 PictureControlSet *pCs) {
665 (void)context_ptr;
666 struct PictureParentControlSet *ppcs = pCs->parent_pcs_ptr;
667 Av1Common * cm = ppcs->av1_cm;
668 FrameHeader * frm_hdr = &ppcs->frm_hdr;
669
670 EbPictureBufferDesc *recon_picture_ptr;
671
672 if (ppcs->is_used_as_reference_flag == EB_TRUE)
673 recon_picture_ptr =
674 ((EbReferenceObject *)pCs->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
675 ->reference_picture16bit;
676
677 else
678 recon_picture_ptr = pCs->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr;
679
680 uint16_t *recon_buffer_y = (uint16_t *)recon_picture_ptr->buffer_y +
681 (recon_picture_ptr->origin_x + recon_picture_ptr->origin_y * recon_picture_ptr->stride_y);
682 uint16_t *recon_buffer_cb = (uint16_t *)recon_picture_ptr->buffer_cb +
683 (recon_picture_ptr->origin_x / 2 +
684 recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cb);
685 uint16_t *recon_buffer_cr = (uint16_t *)recon_picture_ptr->buffer_cr +
686 (recon_picture_ptr->origin_x / 2 +
687 recon_picture_ptr->origin_y / 2 * recon_picture_ptr->stride_cr);
688
689 const int32_t num_planes = av1_num_planes(&scs_ptr->seq_header.color_config);
690 DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
691 uint16_t *linebuf[3];
692 uint16_t *colbuf[3];
693 CdefList dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
694 uint8_t * row_cdef, *prev_row_cdef, *curr_row_cdef;
695 int32_t cdef_count;
696 int32_t dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
697 int32_t var[CDEF_NBLOCKS][CDEF_NBLOCKS] = {{0}};
698 int32_t mi_wide_l2[3];
699 int32_t mi_high_l2[3];
700 int32_t xdec[3];
701 int32_t ydec[3];
702 int32_t coeff_shift = AOMMAX(scs_ptr->static_config.encoder_bit_depth /*cm->bit_depth*/ - 8, 0);
703 const int32_t nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
704 const int32_t nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
705 row_cdef = (uint8_t *)svt_aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
706 assert(row_cdef);
707 memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
708 prev_row_cdef = row_cdef + 1;
709 curr_row_cdef = prev_row_cdef + nhfb + 2;
710 for (int32_t pli = 0; pli < num_planes; pli++) {
711 int32_t subsampling_x = (pli == 0) ? 0 : 1;
712 int32_t subsampling_y = (pli == 0) ? 0 : 1;
713
714 xdec[pli] = subsampling_x; //CHKN xd->plane[pli].subsampling_x;
715 ydec[pli] = subsampling_y; //CHKN xd->plane[pli].subsampling_y;
716 mi_wide_l2[pli] = MI_SIZE_LOG2 - subsampling_x; //CHKN xd->plane[pli].subsampling_x;
717 mi_high_l2[pli] = MI_SIZE_LOG2 - subsampling_y; //CHKN xd->plane[pli].subsampling_y;
718 }
719
720 const int32_t stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
721 for (int32_t pli = 0; pli < num_planes; pli++) {
722 linebuf[pli] = (uint16_t *)svt_aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
723 colbuf[pli] = (uint16_t *)svt_aom_malloc(
724 sizeof(*colbuf) * ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
725 CDEF_HBORDER);
726 }
727
728 for (int32_t fbr = 0; fbr < nvfb; fbr++) {
729 for (int32_t pli = 0; pli < num_planes; pli++) {
730 const int32_t block_height = (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
731 fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER, CDEF_VERY_LARGE);
732 }
733
734 int32_t cdef_left = 1;
735 for (int32_t fbc = 0; fbc < nhfb; fbc++) {
736 int32_t level, sec_strength;
737 int32_t uv_level, uv_sec_strength;
738 int32_t nhb, nvb;
739 int32_t cstart = 0;
740 curr_row_cdef[fbc] = 0;
741
742 //WAHT IS THIS ?? CHKN -->for
743 if (pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc] ==
744 NULL ||
745 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
746 ->mbmi.cdef_strength == -1) {
747 cdef_left = 0;
748 SVT_LOG("\n\n\nCDEF ERROR: Skipping Current FB\n\n\n");
749 continue;
750 }
751
752 if (!cdef_left)
753 cstart =
754 -CDEF_HBORDER; //CHKN if the left block has not been filtered, then we can use samples on the left as input.
755
756 nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
757 nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
758 int32_t frame_top, frame_left, frame_bottom, frame_right;
759
760 int32_t mi_row = MI_SIZE_64X64 * fbr;
761 int32_t mi_col = MI_SIZE_64X64 * fbc;
762 // for the current filter block, it's top left corner mi structure (mi_tl)
763 // is first accessed to check whether the top and left boundaries are
764 // frame boundaries. Then bottom-left and top-right mi structures are
765 // accessed to check whether the bottom and right boundaries
766 // (respectively) are frame boundaries.
767 //
768 // Note that we can't just check the bottom-right mi structure - eg. if
769 // we're at the right-hand edge of the frame but not the bottom, then
770 // the bottom-right mi is NULL but the bottom-left is not.
771 frame_top = (mi_row == 0) ? 1 : 0;
772 frame_left = (mi_col == 0) ? 1 : 0;
773
774 if (fbr != nvfb - 1)
775 frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
776 else
777 frame_bottom = 1;
778
779 if (fbc != nhfb - 1)
780 frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
781 else
782 frame_right = 1;
783
784 const int32_t mbmi_cdef_strength =
785 pCs->mi_grid_base[MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc]
786 ->mbmi.cdef_strength;
787 level = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
788 sec_strength = frm_hdr->cdef_params.cdef_y_strength[mbmi_cdef_strength] %
789 CDEF_SEC_STRENGTHS;
790 sec_strength += sec_strength == 3;
791 uv_level = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] /
792 CDEF_SEC_STRENGTHS;
793 uv_sec_strength = frm_hdr->cdef_params.cdef_uv_strength[mbmi_cdef_strength] %
794 CDEF_SEC_STRENGTHS;
795 uv_sec_strength += uv_sec_strength == 3;
796 if ((level == 0 && sec_strength == 0 && uv_level == 0 && uv_sec_strength == 0) ||
797 (cdef_count = svt_sb_compute_cdef_list(
798 pCs, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, BLOCK_64X64)) == 0) {
799 cdef_left = 0;
800 continue;
801 }
802
803 curr_row_cdef[fbc] = 1;
804 for (int32_t pli = 0; pli < num_planes; pli++) {
805 int32_t coffset;
806 int32_t rend, cend;
807 int32_t pri_damping = frm_hdr->cdef_params.cdef_damping;
808 int32_t sec_damping = pri_damping;
809 int32_t hsize = nhb << mi_wide_l2[pli];
810 int32_t vsize = nvb << mi_high_l2[pli];
811
812 if (pli) {
813 level = uv_level;
814 sec_strength = uv_sec_strength;
815 }
816
817 if (fbc == nhfb - 1)
818 cend = hsize;
819 else
820 cend = hsize + CDEF_HBORDER;
821
822 if (fbr == nvfb - 1)
823 rend = vsize;
824 else
825 rend = vsize + CDEF_VBORDER;
826
827 coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
828 if (fbc == nhfb - 1) {
829 /* On the last superblock column, fill in the right border with
830 CDEF_VERY_LARGE to avoid filtering with the outside. */
831 fill_rect(&src[cend + CDEF_HBORDER],
832 CDEF_BSTRIDE,
833 rend + CDEF_VBORDER,
834 hsize + CDEF_HBORDER - cend,
835 CDEF_VERY_LARGE);
836 }
837 if (fbr == nvfb - 1) {
838 /* On the last superblock row, fill in the bottom border with
839 CDEF_VERY_LARGE to avoid filtering with the outside. */
840 fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE],
841 CDEF_BSTRIDE,
842 CDEF_VBORDER,
843 hsize + 2 * CDEF_HBORDER,
844 CDEF_VERY_LARGE);
845 }
846
847 uint16_t *rec_buff = 0;
848 uint32_t rec_stride = 0;
849
850 switch (pli) {
851 case 0:
852 rec_buff = recon_buffer_y;
853 rec_stride = recon_picture_ptr->stride_y;
854 break;
855 case 1:
856 rec_buff = recon_buffer_cb;
857 rec_stride = recon_picture_ptr->stride_cb;
858
859 break;
860 case 2:
861 rec_buff = recon_buffer_cr;
862 rec_stride = recon_picture_ptr->stride_cr;
863 break;
864 }
865
866 /* Copy in the pixels we need from the current superblock for
867 deringing.*/
868
869 copy_sb16_16(&src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
870 CDEF_BSTRIDE,
871 rec_buff,
872 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr,
873 coffset + cstart,
874 rec_stride,
875 rend,
876 cend - cstart);
877
878 if (!prev_row_cdef[fbc]) {
879 copy_sb16_16(&src[CDEF_HBORDER],
880 CDEF_BSTRIDE,
881 rec_buff,
882 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
883 coffset,
884 rec_stride,
885 CDEF_VBORDER,
886 hsize);
887 } else if (fbr > 0) {
888 copy_rect(&src[CDEF_HBORDER],
889 CDEF_BSTRIDE,
890 &linebuf[pli][coffset],
891 stride,
892 CDEF_VBORDER,
893 hsize);
894 } else {
895 fill_rect(
896 &src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize, CDEF_VERY_LARGE);
897 }
898
899 if (!prev_row_cdef[fbc - 1]) {
900 copy_sb16_16(src,
901 CDEF_BSTRIDE,
902 rec_buff,
903 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
904 coffset - CDEF_HBORDER,
905 rec_stride,
906 CDEF_VBORDER,
907 CDEF_HBORDER);
908 } else if (fbr > 0 && fbc > 0) {
909 copy_rect(src,
910 CDEF_BSTRIDE,
911 &linebuf[pli][coffset - CDEF_HBORDER],
912 stride,
913 CDEF_VBORDER,
914 CDEF_HBORDER);
915 } else {
916 fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
917 }
918
919 if (!prev_row_cdef[fbc + 1]) {
920 copy_sb16_16(&src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
921 CDEF_BSTRIDE,
922 rec_buff,
923 (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
924 coffset + hsize,
925 rec_stride,
926 CDEF_VBORDER,
927 CDEF_HBORDER);
928 } else if (fbr > 0 && fbc < nhfb - 1) {
929 copy_rect(&src[hsize + CDEF_HBORDER],
930 CDEF_BSTRIDE,
931 &linebuf[pli][coffset + hsize],
932 stride,
933 CDEF_VBORDER,
934 CDEF_HBORDER);
935 } else {
936 fill_rect(&src[hsize + CDEF_HBORDER],
937 CDEF_BSTRIDE,
938 CDEF_VBORDER,
939 CDEF_HBORDER,
940 CDEF_VERY_LARGE);
941 }
942
943 if (cdef_left) {
944 /* If we deringed the superblock on the left then we need to copy in
945 saved pixels. */
946 copy_rect(src,
947 CDEF_BSTRIDE,
948 colbuf[pli],
949 CDEF_HBORDER,
950 rend + CDEF_VBORDER,
951 CDEF_HBORDER);
952 }
953
954 /* Saving pixels in case we need to dering the superblock on the
955 right. */
956 if (fbc < nhfb - 1)
957 copy_rect(colbuf[pli],
958 CDEF_HBORDER,
959 src + hsize,
960 CDEF_BSTRIDE,
961 rend + CDEF_VBORDER,
962 CDEF_HBORDER);
963 if (fbr < nvfb - 1)
964 copy_sb16_16(&linebuf[pli][coffset],
965 stride,
966 rec_buff,
967 (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
968 coffset,
969 rec_stride,
970 CDEF_VBORDER,
971 hsize);
972 if (frame_top) {
973 fill_rect(
974 src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
975 }
976 if (frame_left) {
977 fill_rect(
978 src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
979 }
980 if (frame_bottom) {
981 fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE],
982 CDEF_BSTRIDE,
983 CDEF_VBORDER,
984 hsize + 2 * CDEF_HBORDER,
985 CDEF_VERY_LARGE);
986 }
987 if (frame_right) {
988 fill_rect(&src[hsize + CDEF_HBORDER],
989 CDEF_BSTRIDE,
990 vsize + 2 * CDEF_VBORDER,
991 CDEF_HBORDER,
992 CDEF_VERY_LARGE);
993 }
994
995 svt_cdef_filter_fb(NULL,
996 &rec_buff[rec_stride * (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
997 (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
998 rec_stride,
999 &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER],
1000 xdec[pli],
1001 ydec[pli],
1002 dir,
1003 NULL,
1004 var,
1005 pli,
1006 dlist,
1007 cdef_count,
1008 level,
1009 sec_strength,
1010 pri_damping,
1011 sec_damping,
1012 coeff_shift);
1013 }
1014 cdef_left = 1; //CHKN filtered data is written back directy to recFrame.
1015 }
1016 {
1017 uint8_t *tmp = prev_row_cdef;
1018 prev_row_cdef = curr_row_cdef;
1019 curr_row_cdef = tmp;
1020 }
1021 }
1022 svt_aom_free(row_cdef);
1023 for (int32_t pli = 0; pli < num_planes; pli++) {
1024 svt_aom_free(linebuf[pli]);
1025 svt_aom_free(colbuf[pli]);
1026 }
1027 }
1028
1029 ///-------search
1030
1031 /* Search for the best strength to add as an option, knowing we
1032 already selected nb_strengths options. */
search_one(int32_t * lev,int32_t nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int32_t sb_count,int32_t start_gi,int32_t end_gi)1033 static uint64_t search_one(int32_t *lev, int32_t nb_strengths, uint64_t mse[][TOTAL_STRENGTHS],
1034 int32_t sb_count, int32_t start_gi, int32_t end_gi) {
1035 uint64_t tot_mse[TOTAL_STRENGTHS];
1036 const int32_t total_strengths = end_gi;
1037 int32_t i, j;
1038 uint64_t best_tot_mse = (uint64_t)1 << 63;
1039 int32_t best_id = 0;
1040 memset(tot_mse, 0, sizeof(tot_mse));
1041 for (i = 0; i < sb_count; i++) {
1042 int32_t gi;
1043 uint64_t best_mse = (uint64_t)1 << 63;
1044 /* Find best mse among already selected options. */
1045 for (gi = 0; gi < nb_strengths; gi++) {
1046 if (mse[i][lev[gi]] < best_mse)
1047 best_mse = mse[i][lev[gi]];
1048 }
1049 /* Find best mse when adding each possible new option. */
1050
1051 for (j = start_gi; j < total_strengths; j++) {
1052 uint64_t best = best_mse;
1053 if (mse[i][j] < best)
1054 best = mse[i][j];
1055 tot_mse[j] += best;
1056 }
1057 }
1058 for (j = start_gi; j < total_strengths; j++) {
1059 if (tot_mse[j] < best_tot_mse) {
1060 best_tot_mse = tot_mse[j];
1061 best_id = j;
1062 }
1063 }
1064 lev[nb_strengths] = best_id;
1065 return best_tot_mse;
1066 }
1067
1068 /* Search for the best luma+chroma strength to add as an option, knowing we
1069 already selected nb_strengths options. */
svt_search_one_dual_c(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,int start_gi,int end_gi)1070 uint64_t svt_search_one_dual_c(int *lev0, int *lev1, int nb_strengths,
1071 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, int start_gi,
1072 int end_gi) {
1073 uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
1074 int32_t i, j;
1075 uint64_t best_tot_mse = (uint64_t)1 << 63;
1076 int32_t best_id0 = 0;
1077 int32_t best_id1 = 0;
1078 const int32_t total_strengths = end_gi;
1079 memset(tot_mse, 0, sizeof(tot_mse));
1080 for (i = 0; i < sb_count; i++) {
1081 int32_t gi;
1082 uint64_t best_mse = (uint64_t)1 << 63;
1083 /* Find best mse among already selected options. */
1084 for (gi = 0; gi < nb_strengths; gi++) {
1085 uint64_t curr = mse[0][i][lev0[gi]];
1086 curr += mse[1][i][lev1[gi]];
1087 if (curr < best_mse)
1088 best_mse = curr;
1089 }
1090 /* Find best mse when adding each possible new option. */
1091 for (j = start_gi; j < total_strengths; j++) {
1092 int32_t k;
1093 for (k = start_gi; k < total_strengths; k++) {
1094 uint64_t best = best_mse;
1095 uint64_t curr = mse[0][i][j];
1096 curr += mse[1][i][k];
1097 if (curr < best)
1098 best = curr;
1099 tot_mse[j][k] += best;
1100 }
1101 }
1102 }
1103
1104 for (j = start_gi; j < total_strengths; j++) {
1105 int32_t k;
1106 for (k = start_gi; k < total_strengths; k++) {
1107 if (tot_mse[j][k] < best_tot_mse) {
1108 best_tot_mse = tot_mse[j][k];
1109 best_id0 = j;
1110 best_id1 = k;
1111 }
1112 }
1113 }
1114 lev0[nb_strengths] = best_id0;
1115 lev1[nb_strengths] = best_id1;
1116 return best_tot_mse;
1117 }
1118
1119 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int32_t * best_lev,int32_t nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int32_t sb_count,int32_t start_gi,int32_t end_gi)1120 static uint64_t joint_strength_search(int32_t *best_lev, int32_t nb_strengths,
1121 uint64_t mse[][TOTAL_STRENGTHS], int32_t sb_count,
1122 int32_t start_gi, int32_t end_gi) {
1123 uint64_t best_tot_mse;
1124 int32_t i;
1125 best_tot_mse = (uint64_t)1 << 63;
1126 /* Greedy search: add one strength options at a time. */
1127 for (i = 0; i < nb_strengths; i++)
1128 best_tot_mse = search_one(best_lev, i, mse, sb_count, start_gi, end_gi);
1129 /* Trying to refine the greedy search by reconsidering each
1130 already-selected option. */
1131 for (i = 0; i < 4 * nb_strengths; i++) {
1132 int32_t j;
1133 for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
1134 best_tot_mse = search_one(best_lev, nb_strengths - 1, mse, sb_count, start_gi, end_gi);
1135 }
1136 return best_tot_mse;
1137 }
1138
1139 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int32_t * best_lev0,int32_t * best_lev1,int32_t nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int32_t sb_count,int32_t start_gi,int32_t end_gi)1140 static uint64_t joint_strength_search_dual(int32_t *best_lev0, int32_t *best_lev1,
1141 int32_t nb_strengths, uint64_t (**mse)[TOTAL_STRENGTHS],
1142 int32_t sb_count, int32_t start_gi, int32_t end_gi) {
1143 uint64_t best_tot_mse;
1144 int32_t i;
1145 best_tot_mse = (uint64_t)1 << 63;
1146 /* Greedy search: add one strength options at a time. */
1147 for (i = 0; i < nb_strengths; i++)
1148 best_tot_mse = svt_search_one_dual(
1149 best_lev0, best_lev1, i, mse, sb_count, start_gi, end_gi);
1150 /* Trying to refine the greedy search by reconsidering each
1151 already-selected option. */
1152 for (i = 0; i < 4 * nb_strengths; i++) {
1153 int32_t j;
1154 for (j = 0; j < nb_strengths - 1; j++) {
1155 best_lev0[j] = best_lev0[j + 1];
1156 best_lev1[j] = best_lev1[j + 1];
1157 }
1158 best_tot_mse = svt_search_one_dual(
1159 best_lev0, best_lev1, nb_strengths - 1, mse, sb_count, start_gi, end_gi);
1160 }
1161 return best_tot_mse;
1162 }
1163
1164 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
1165 get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, (strength_idx)); \
1166 cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;
finish_cdef_search(EncDecContext * context_ptr,PictureControlSet * pcs_ptr,int32_t selected_strength_cnt[64])1167 void finish_cdef_search(EncDecContext *context_ptr, PictureControlSet *pcs_ptr,
1168 int32_t selected_strength_cnt[64]) {
1169 (void)context_ptr;
1170 struct PictureParentControlSet *ppcs = pcs_ptr->parent_pcs_ptr;
1171 FrameHeader * frm_hdr = &ppcs->frm_hdr;
1172 Av1Common * cm = ppcs->av1_cm;
1173 int32_t mi_rows = ppcs->av1_cm->mi_rows;
1174 int32_t mi_cols = ppcs->av1_cm->mi_cols;
1175
1176 int32_t fbr, fbc;
1177
1178 int32_t pli;
1179
1180 uint64_t best_tot_mse = (uint64_t)1 << 63;
1181 int32_t sb_count;
1182 int32_t nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1183 int32_t nhfb = (mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
1184 int32_t * sb_index = (int32_t *)malloc(nvfb * nhfb * sizeof(*sb_index));
1185 int32_t * selected_strength = (int32_t *)malloc(nvfb * nhfb * sizeof(*sb_index));
1186 int32_t start_gi;
1187 int32_t end_gi;
1188 CDEF_PICK_METHOD pick_method = pcs_ptr->parent_pcs_ptr->cdef_level == 2 ? CDEF_FAST_SEARCH_LVL1
1189 : pcs_ptr->parent_pcs_ptr->cdef_level == 3 ? CDEF_FAST_SEARCH_LVL2
1190 : pcs_ptr->parent_pcs_ptr->cdef_level > 3 ? CDEF_FAST_SEARCH_LVL3 : 0;
1191
1192 const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 && pick_method <= CDEF_FAST_SEARCH_LVL3);
1193 assert(sb_index != NULL);
1194 assert(selected_strength != NULL);
1195
1196 start_gi = 0;
1197 end_gi = nb_cdef_strengths[pick_method];
1198
1199 uint64_t(*mse[2])[TOTAL_STRENGTHS];
1200 int32_t pri_damping = 3 + (frm_hdr->quantization_params.base_q_idx >> 6);
1201 int32_t i;
1202 int32_t nb_strengths;
1203 int32_t nb_strength_bits;
1204 uint64_t lambda;
1205 const int32_t num_planes = 3; // av1_num_planes(cm);
1206 uint16_t qp_index = (uint8_t)pcs_ptr->parent_pcs_ptr->frm_hdr.quantization_params.base_q_idx;
1207 uint32_t fast_lambda, full_lambda = 0;
1208 (*av1_lambda_assignment_function_table[pcs_ptr->parent_pcs_ptr->pred_structure])(
1209 pcs_ptr,
1210 &fast_lambda,
1211 &full_lambda,
1212 (uint8_t)pcs_ptr->parent_pcs_ptr->enhanced_picture_ptr->bit_depth,
1213 qp_index,
1214 EB_FALSE);
1215 lambda = full_lambda;
1216
1217 mse[0] = (uint64_t(*)[64])malloc(sizeof(**mse) * nvfb * nhfb);
1218 mse[1] = (uint64_t(*)[64])malloc(sizeof(**mse) * nvfb * nhfb);
1219
1220 sb_count = 0;
1221 for (fbr = 0; fbr < nvfb; ++fbr) {
1222 for (fbc = 0; fbc < nhfb; ++fbc) {
1223 ModeInfo **mi = pcs_ptr->mi_grid_base + MI_SIZE_64X64 * fbr * cm->mi_stride +
1224 MI_SIZE_64X64 * fbc;
1225 const MbModeInfo *mbmi = &mi[0]->mbmi;
1226
1227 if (((fbc & 1) &&
1228 (mbmi->block_mi.sb_type == BLOCK_128X128 ||
1229 mbmi->block_mi.sb_type == BLOCK_128X64)) ||
1230 ((fbr & 1) &&
1231 (mbmi->block_mi.sb_type == BLOCK_128X128 ||
1232 mbmi->block_mi.sb_type == BLOCK_64X128))) {
1233 continue;
1234 }
1235
1236 // No filtering if the entire filter block is skipped
1237 if (svt_sb_all_skip(pcs_ptr, cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64))
1238 continue;
1239
1240 for (pli = 0; pli < num_planes; pli++) {
1241 if (pli == 0)
1242 svt_memcpy(mse[0][sb_count],
1243 pcs_ptr->mse_seg[0][fbr * nhfb + fbc],
1244 TOTAL_STRENGTHS * sizeof(uint64_t));
1245 if (pli == 2)
1246 svt_memcpy(mse[1][sb_count],
1247 pcs_ptr->mse_seg[1][fbr * nhfb + fbc],
1248 TOTAL_STRENGTHS * sizeof(uint64_t));
1249 sb_index[sb_count] = MI_SIZE_64X64 * fbr * pcs_ptr->mi_stride + MI_SIZE_64X64 * fbc;
1250 }
1251 sb_count++;
1252 }
1253 }
1254
1255 nb_strength_bits = 0;
1256 /* Search for different number of signalling bits. */
1257 for (i = 0; i <= 3; i++) {
1258 int32_t best_lev0[CDEF_MAX_STRENGTHS];
1259 int32_t best_lev1[CDEF_MAX_STRENGTHS] = {0};
1260 nb_strengths = 1 << i;
1261 uint64_t tot_mse = joint_strength_search_dual(
1262 best_lev0, best_lev1, nb_strengths, mse, sb_count, start_gi, end_gi);
1263 (void)joint_strength_search;
1264 /* Count superblock signalling cost. */
1265 const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS * 2;
1266 const int rate_cost = av1_cost_literal(total_bits);
1267 const uint64_t dist = tot_mse * 16;
1268 tot_mse = RDCOST(lambda, rate_cost, dist);
1269 if (tot_mse < best_tot_mse) {
1270 best_tot_mse = tot_mse;
1271 nb_strength_bits = i;
1272 for (int32_t j = 0; j < 1 << nb_strength_bits; j++) {
1273 frm_hdr->cdef_params.cdef_y_strength[j] = best_lev0[j];
1274 frm_hdr->cdef_params.cdef_uv_strength[j] = best_lev1[j];
1275 }
1276 }
1277 }
1278 nb_strengths = 1 << nb_strength_bits;
1279
1280 frm_hdr->cdef_params.cdef_bits = nb_strength_bits;
1281 ppcs->nb_cdef_strengths = nb_strengths;
1282 for (i = 0; i < sb_count; i++) {
1283 int32_t gi;
1284 int32_t best_gi;
1285 uint64_t best_mse = (uint64_t)1 << 63;
1286 best_gi = 0;
1287 for (gi = 0; gi < ppcs->nb_cdef_strengths; gi++) {
1288 uint64_t curr = mse[0][i][frm_hdr->cdef_params.cdef_y_strength[gi]];
1289 curr += mse[1][i][frm_hdr->cdef_params.cdef_uv_strength[gi]];
1290 if (curr < best_mse) {
1291 best_gi = gi;
1292 best_mse = curr;
1293 }
1294 }
1295 selected_strength[i] = best_gi;
1296 selected_strength_cnt[best_gi]++;
1297
1298 pcs_ptr->mi_grid_base[sb_index[i]]->mbmi.cdef_strength = (int8_t)best_gi;
1299 //in case the fb is within a block=128x128 or 128x64, or 64x128, then we genrate param only for the first 64x64.
1300 //since our mi map deos not have the multi pointer single data assignment, we need to duplicate data.
1301 BlockSize sb_type = pcs_ptr->mi_grid_base[sb_index[i]]->mbmi.block_mi.sb_type;
1302
1303 switch (sb_type) {
1304 case BLOCK_128X128:
1305 pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)
1306 best_gi;
1307 pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs_ptr->mi_stride]
1308 ->mbmi.cdef_strength = (int8_t)best_gi;
1309 pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs_ptr->mi_stride + MI_SIZE_64X64]
1310 ->mbmi.cdef_strength = (int8_t)best_gi;
1311 break;
1312 case BLOCK_128X64:
1313 pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64]->mbmi.cdef_strength = (int8_t)
1314 best_gi;
1315 break;
1316 case BLOCK_64X128:
1317 pcs_ptr->mi_grid_base[sb_index[i] + MI_SIZE_64X64 * pcs_ptr->mi_stride]
1318 ->mbmi.cdef_strength = (int8_t)best_gi;
1319 break;
1320 default: break;
1321 }
1322 }
1323 if (fast) {
1324 for (int j = 0; j < ppcs->nb_cdef_strengths; j++) {
1325 const int luma_strength = frm_hdr->cdef_params.cdef_y_strength[j];
1326 const int chroma_strength = frm_hdr->cdef_params.cdef_uv_strength[j];
1327 int pri_strength, sec_strength;
1328 STORE_CDEF_FILTER_STRENGTH(
1329 frm_hdr->cdef_params.cdef_y_strength[j], pick_method, luma_strength);
1330 STORE_CDEF_FILTER_STRENGTH(
1331 frm_hdr->cdef_params.cdef_uv_strength[j], pick_method, chroma_strength);
1332 }
1333 }
1334 //cdef_pri_damping & cdef_sec_damping consolidated to cdef_damping
1335 frm_hdr->cdef_params.cdef_damping = pri_damping;
1336 free(mse[0]);
1337 free(mse[1]);
1338 free(sb_index);
1339 free(selected_strength);
1340 }
1341