1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <math.h>
13 #include <string.h>
14
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/aom_scale_rtcd.h"
17
18 #include "aom/aom_integer.h"
19 #include "aom_ports/system_state.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/encoder/encoder.h"
23 #include "av1/encoder/ethread.h"
24 #include "av1/encoder/pickcdef.h"
25
26 // Get primary and secondary filter strength for the given strength index and
27 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)28 static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
29 int *pri_strength,
30 int *sec_strength,
31 int strength_idx) {
32 const int tot_sec_filter = (pick_method >= CDEF_FAST_SEARCH_LVL3)
33 ? REDUCED_SEC_STRENGTHS_LVL3
34 : CDEF_SEC_STRENGTHS;
35 const int pri_idx = strength_idx / tot_sec_filter;
36 const int sec_idx = strength_idx % tot_sec_filter;
37 *pri_strength = pri_idx;
38 *sec_strength = sec_idx;
39 if (pick_method == CDEF_FULL_SEARCH) return;
40
41 switch (pick_method) {
42 case CDEF_FAST_SEARCH_LVL1: *pri_strength = priconv_lvl1[pri_idx]; break;
43 case CDEF_FAST_SEARCH_LVL2: *pri_strength = priconv_lvl2[pri_idx]; break;
44 case CDEF_FAST_SEARCH_LVL3:
45 *pri_strength = priconv_lvl2[pri_idx];
46 *sec_strength = secconv_lvl3[sec_idx];
47 break;
48 case CDEF_FAST_SEARCH_LVL4:
49 *pri_strength = priconv_lvl4[pri_idx];
50 *sec_strength = secconv_lvl3[sec_idx];
51 break;
52 default: assert(0 && "Invalid CDEF search method");
53 }
54 }
55
56 // Store CDEF filter strength calculated from strength index for given search
57 // method
58 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
59 get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength, \
60 (strength_idx)); \
61 cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;
62
63 /* Search for the best strength to add as an option, knowing we
64 already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)65 static uint64_t search_one(int *lev, int nb_strengths,
66 uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
67 CDEF_PICK_METHOD pick_method) {
68 uint64_t tot_mse[TOTAL_STRENGTHS];
69 const int total_strengths = nb_cdef_strengths[pick_method];
70 int i, j;
71 uint64_t best_tot_mse = (uint64_t)1 << 63;
72 int best_id = 0;
73 memset(tot_mse, 0, sizeof(tot_mse));
74 for (i = 0; i < sb_count; i++) {
75 int gi;
76 uint64_t best_mse = (uint64_t)1 << 63;
77 /* Find best mse among already selected options. */
78 for (gi = 0; gi < nb_strengths; gi++) {
79 if (mse[i][lev[gi]] < best_mse) {
80 best_mse = mse[i][lev[gi]];
81 }
82 }
83 /* Find best mse when adding each possible new option. */
84 for (j = 0; j < total_strengths; j++) {
85 uint64_t best = best_mse;
86 if (mse[i][j] < best) best = mse[i][j];
87 tot_mse[j] += best;
88 }
89 }
90 for (j = 0; j < total_strengths; j++) {
91 if (tot_mse[j] < best_tot_mse) {
92 best_tot_mse = tot_mse[j];
93 best_id = j;
94 }
95 }
96 lev[nb_strengths] = best_id;
97 return best_tot_mse;
98 }
99
100 /* Search for the best luma+chroma strength to add as an option, knowing we
101 already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)102 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
103 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
104 CDEF_PICK_METHOD pick_method) {
105 uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
106 int i, j;
107 uint64_t best_tot_mse = (uint64_t)1 << 63;
108 int best_id0 = 0;
109 int best_id1 = 0;
110 const int total_strengths = nb_cdef_strengths[pick_method];
111 memset(tot_mse, 0, sizeof(tot_mse));
112 for (i = 0; i < sb_count; i++) {
113 int gi;
114 uint64_t best_mse = (uint64_t)1 << 63;
115 /* Find best mse among already selected options. */
116 for (gi = 0; gi < nb_strengths; gi++) {
117 uint64_t curr = mse[0][i][lev0[gi]];
118 curr += mse[1][i][lev1[gi]];
119 if (curr < best_mse) {
120 best_mse = curr;
121 }
122 }
123 /* Find best mse when adding each possible new option. */
124 for (j = 0; j < total_strengths; j++) {
125 int k;
126 for (k = 0; k < total_strengths; k++) {
127 uint64_t best = best_mse;
128 uint64_t curr = mse[0][i][j];
129 curr += mse[1][i][k];
130 if (curr < best) best = curr;
131 tot_mse[j][k] += best;
132 }
133 }
134 }
135 for (j = 0; j < total_strengths; j++) {
136 int k;
137 for (k = 0; k < total_strengths; k++) {
138 if (tot_mse[j][k] < best_tot_mse) {
139 best_tot_mse = tot_mse[j][k];
140 best_id0 = j;
141 best_id1 = k;
142 }
143 }
144 }
145 lev0[nb_strengths] = best_id0;
146 lev1[nb_strengths] = best_id1;
147 return best_tot_mse;
148 }
149
150 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)151 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
152 uint64_t mse[][TOTAL_STRENGTHS],
153 int sb_count,
154 CDEF_PICK_METHOD pick_method) {
155 uint64_t best_tot_mse;
156 int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
157 pick_method <= CDEF_FAST_SEARCH_LVL4);
158 int i;
159 best_tot_mse = (uint64_t)1 << 63;
160 /* Greedy search: add one strength options at a time. */
161 for (i = 0; i < nb_strengths; i++) {
162 best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
163 }
164 /* Trying to refine the greedy search by reconsidering each
165 already-selected option. */
166 if (!fast) {
167 for (i = 0; i < 4 * nb_strengths; i++) {
168 int j;
169 for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
170 best_tot_mse =
171 search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
172 }
173 }
174 return best_tot_mse;
175 }
176
177 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)178 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
179 int nb_strengths,
180 uint64_t (**mse)[TOTAL_STRENGTHS],
181 int sb_count,
182 CDEF_PICK_METHOD pick_method) {
183 uint64_t best_tot_mse;
184 int i;
185 best_tot_mse = (uint64_t)1 << 63;
186 /* Greedy search: add one strength options at a time. */
187 for (i = 0; i < nb_strengths; i++) {
188 best_tot_mse =
189 search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
190 }
191 /* Trying to refine the greedy search by reconsidering each
192 already-selected option. */
193 for (i = 0; i < 4 * nb_strengths; i++) {
194 int j;
195 for (j = 0; j < nb_strengths - 1; j++) {
196 best_lev0[j] = best_lev0[j + 1];
197 best_lev1[j] = best_lev1[j + 1];
198 }
199 best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
200 sb_count, pick_method);
201 }
202 return best_tot_mse;
203 }
204
205 #if CONFIG_AV1_HIGHBITDEPTH
copy_sb16_16_highbd(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)206 static void copy_sb16_16_highbd(uint16_t *dst, int dstride, const void *src,
207 int src_voffset, int src_hoffset, int sstride,
208 int vsize, int hsize) {
209 int r;
210 const uint16_t *src16 = CONVERT_TO_SHORTPTR((uint8_t *)src);
211 const uint16_t *base = &src16[src_voffset * sstride + src_hoffset];
212 for (r = 0; r < vsize; r++)
213 memcpy(dst + r * dstride, base + r * sstride, hsize * sizeof(*base));
214 }
215 #endif
216
copy_sb16_16(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)217 static void copy_sb16_16(uint16_t *dst, int dstride, const void *src,
218 int src_voffset, int src_hoffset, int sstride,
219 int vsize, int hsize) {
220 int r, c;
221 const uint8_t *src8 = (uint8_t *)src;
222 const uint8_t *base = &src8[src_voffset * sstride + src_hoffset];
223 for (r = 0; r < vsize; r++)
224 for (c = 0; c < hsize; c++)
225 dst[r * dstride + c] = (uint16_t)base[r * sstride + c];
226 }
227
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)228 static INLINE void init_src_params(int *src_stride, int *width, int *height,
229 int *width_log2, int *height_log2,
230 BLOCK_SIZE bsize) {
231 *src_stride = block_size_wide[bsize];
232 *width = block_size_wide[bsize];
233 *height = block_size_high[bsize];
234 *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
235 *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
236 }
237 #if CONFIG_AV1_HIGHBITDEPTH
238 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)239 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
240 cdef_list *dlist, int cdef_count,
241 BLOCK_SIZE bsize, int coeff_shift,
242 int row, int col) {
243 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
244 bsize == BLOCK_8X8);
245 uint64_t sum = 0;
246 int bi, bx, by;
247 uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
248 uint16_t *dst_buff = &dst16[row * dstride + col];
249 int src_stride, width, height, width_log2, height_log2;
250 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
251 bsize);
252 for (bi = 0; bi < cdef_count; bi++) {
253 by = dlist[bi].by;
254 bx = dlist[bi].bx;
255 sum += aom_mse_wxh_16bit_highbd(
256 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
257 &src[bi << (height_log2 + width_log2)], src_stride, width, height);
258 }
259 return sum >> 2 * coeff_shift;
260 }
261 #endif
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)262 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
263 cdef_list *dlist, int cdef_count,
264 BLOCK_SIZE bsize, int coeff_shift, int row,
265 int col) {
266 assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
267 bsize == BLOCK_8X8);
268 uint64_t sum = 0;
269 int bi, bx, by;
270 uint8_t *dst8 = (uint8_t *)dst;
271 uint8_t *dst_buff = &dst8[row * dstride + col];
272 int src_stride, width, height, width_log2, height_log2;
273 init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
274 bsize);
275 for (bi = 0; bi < cdef_count; bi++) {
276 by = dlist[bi].by;
277 bx = dlist[bi].bx;
278 sum += aom_mse_wxh_16bit(
279 &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
280 &src[bi << (height_log2 + width_log2)], src_stride, width, height);
281 }
282 return sum >> 2 * coeff_shift;
283 }
284
285 // Calculates MSE at block level.
286 // Inputs:
287 // cdef_search_ctx: Pointer to the structure containing parameters related to
288 // CDEF search context.
289 // fbr: Row index in units of 64x64 block
290 // fbc: Column index in units of 64x64 block
291 // Returns:
292 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,int fbr,int fbc,int sb_count)293 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
294 int sb_count) {
295 const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
296 const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
297 const int coeff_shift = cdef_search_ctx->coeff_shift;
298 const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
299 const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
300
301 // Declare and initialize the temporary buffers.
302 DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
303 DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
304 cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
305 int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
306 int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
307 uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
308 int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
309 int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
310 int hb_step = 1, vb_step = 1;
311 BLOCK_SIZE bs;
312
313 const MB_MODE_INFO *const mbmi =
314 mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
315 MI_SIZE_64X64 * fbc];
316
317 uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
318 ref->v_buffer };
319 int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
320 ref->uv_stride };
321
322 if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
323 mbmi->bsize == BLOCK_64X128) {
324 bs = mbmi->bsize;
325 if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
326 nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
327 hb_step = 2;
328 }
329 if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
330 nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
331 vb_step = 2;
332 }
333 } else {
334 bs = BLOCK_64X64;
335 }
336 // Get number of 8x8 blocks which are not skip. Cdef processing happens for
337 // 8x8 blocks which are not skip.
338 const int cdef_count = av1_cdef_compute_sb_list(
339 mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
340
341 const int yoff = CDEF_VBORDER * (fbr != 0);
342 const int xoff = CDEF_HBORDER * (fbc != 0);
343 int dirinit = 0;
344 for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
345 for (int i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
346 /* We avoid filtering the pixels for which some of the pixels to
347 average are outside the frame. We could change the filter instead,
348 but it would add special cases for any future vectorization. */
349 const int ysize = (nvb << mi_high_l2[pli]) +
350 CDEF_VBORDER * (fbr + vb_step < cdef_search_ctx->nvfb) +
351 yoff;
352 const int xsize = (nhb << mi_wide_l2[pli]) +
353 CDEF_HBORDER * (fbc + hb_step < cdef_search_ctx->nhfb) +
354 xoff;
355 const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
356 const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
357 struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
358 cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
359 pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
360 ysize, xsize);
361 for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
362 int pri_strength, sec_strength;
363 get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
364 &sec_strength, gi);
365 av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
366 cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
367 dir, &dirinit, var, pli, dlist, cdef_count,
368 pri_strength, sec_strength + (sec_strength == 3),
369 cdef_search_ctx->damping, coeff_shift);
370 const uint64_t curr_mse = cdef_search_ctx->compute_cdef_dist_fn(
371 ref_buffer[pli], ref_stride[pli], tmp_dst, dlist, cdef_count,
372 cdef_search_ctx->bsize[pli], coeff_shift, row, col);
373 if (pli < 2)
374 cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
375 else
376 cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
377 }
378 }
379 cdef_search_ctx->sb_index[sb_count] =
380 MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
381 }
382
383 // MSE calculation at frame level.
384 // Inputs:
385 // cdef_search_ctx: Pointer to the structure containing parameters related to
386 // CDEF search context.
387 // Returns:
388 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx)389 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
390 // Loop over each sb.
391 for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
392 for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
393 // Checks if cdef processing can be skipped for particular sb.
394 if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
395 // Calculate mse for each sb and store the relevant sb index.
396 av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc,
397 cdef_search_ctx->sb_count);
398 cdef_search_ctx->sb_count++;
399 }
400 }
401 }
402
403 // Allocates memory for members of CdefSearchCtx.
404 // Inputs:
405 // cdef_search_ctx: Pointer to the structure containing parameters
406 // related to CDEF search context.
407 // Returns:
408 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(CdefSearchCtx * cdef_search_ctx)409 static AOM_INLINE void cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
410 const int nvfb = cdef_search_ctx->nvfb;
411 const int nhfb = cdef_search_ctx->nhfb;
412 cdef_search_ctx->sb_index =
413 aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index));
414 cdef_search_ctx->sb_count = 0;
415 cdef_search_ctx->mse[0] =
416 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
417 cdef_search_ctx->mse[1] =
418 aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
419 }
420
421 // Deallocates the memory allocated for members of CdefSearchCtx.
422 // Inputs:
423 // cdef_search_ctx: Pointer to the structure containing parameters
424 // related to CDEF search context.
425 // Returns:
426 // Nothing will be returned.
cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)427 static AOM_INLINE void cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
428 aom_free(cdef_search_ctx->mse[0]);
429 aom_free(cdef_search_ctx->mse[1]);
430 aom_free(cdef_search_ctx->sb_index);
431 }
432
433 // Initialize the parameters related to CDEF search context.
434 // Inputs:
435 // frame: Pointer to compressed frame buffer
436 // ref: Pointer to the frame buffer holding the source frame
437 // cm: Pointer to top level common structure
438 // xd: Pointer to common current coding block structure
439 // cdef_search_ctx: Pointer to the structure containing parameters related to
440 // CDEF search context.
441 // pick_method: Search method used to select CDEF parameters
442 // Returns:
443 // Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)444 static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
445 const YV12_BUFFER_CONFIG *ref,
446 AV1_COMMON *cm, MACROBLOCKD *xd,
447 CdefSearchCtx *cdef_search_ctx,
448 CDEF_PICK_METHOD pick_method) {
449 const CommonModeInfoParams *const mi_params = &cm->mi_params;
450 const int num_planes = av1_num_planes(cm);
451 cdef_search_ctx->mi_params = &cm->mi_params;
452 cdef_search_ctx->ref = ref;
453 cdef_search_ctx->nvfb =
454 (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
455 cdef_search_ctx->nhfb =
456 (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
457 cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
458 cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
459 cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
460 cdef_search_ctx->num_planes = num_planes;
461 cdef_search_ctx->pick_method = pick_method;
462 cdef_search_ctx->sb_count = 0;
463 av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
464 num_planes);
465 // Initialize plane wise information.
466 for (int pli = 0; pli < num_planes; pli++) {
467 cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
468 cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
469 cdef_search_ctx->bsize[pli] =
470 cdef_search_ctx->ydec[pli]
471 ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
472 : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
473 cdef_search_ctx->mi_wide_l2[pli] =
474 MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
475 cdef_search_ctx->mi_high_l2[pli] =
476 MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
477 cdef_search_ctx->plane[pli] = xd->plane[pli];
478 }
479 // Function pointer initialization.
480 #if CONFIG_AV1_HIGHBITDEPTH
481 if (cm->seq_params.use_highbitdepth) {
482 cdef_search_ctx->copy_fn = copy_sb16_16_highbd;
483 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
484 } else {
485 cdef_search_ctx->copy_fn = copy_sb16_16;
486 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
487 }
488 #else
489 cdef_search_ctx->copy_fn = copy_sb16_16;
490 cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
491 #endif
492 }
493
pick_cdef_from_qp(AV1_COMMON * const cm)494 static void pick_cdef_from_qp(AV1_COMMON *const cm) {
495 const int bd = cm->seq_params.bit_depth;
496 const int q =
497 av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
498 CdefInfo *const cdef_info = &cm->cdef_info;
499 cdef_info->cdef_bits = 0;
500 cdef_info->nb_cdef_strengths = 1;
501 cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
502
503 int predicted_y_f1 = 0;
504 int predicted_y_f2 = 0;
505 int predicted_uv_f1 = 0;
506 int predicted_uv_f2 = 0;
507 aom_clear_system_state();
508 if (!frame_is_intra_only(cm)) {
509 predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
510 q * 0.0068615186f + 0.02709886f),
511 0, 15);
512 predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
513 q * 0.0013993345f + 0.03831067f),
514 0, 3);
515 predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
516 q * 0.0034628846f + 0.00887099f),
517 0, 15);
518 predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
519 q * 0.00028223585f + 0.05576307f),
520 0, 3);
521 } else {
522 predicted_y_f1 = clamp(
523 (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
524 0, 15);
525 predicted_y_f2 = clamp(
526 (int)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f),
527 0, 3);
528 predicted_uv_f1 = clamp(
529 (int)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f),
530 0, 15);
531 predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
532 q * 0.00035520183f + 0.00228092f),
533 0, 3);
534 }
535 cdef_info->cdef_strengths[0] =
536 predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
537 cdef_info->cdef_uv_strengths[0] =
538 predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
539
540 const CommonModeInfoParams *const mi_params = &cm->mi_params;
541 const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
542 const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
543 MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
544 for (int r = 0; r < nvfb; ++r) {
545 for (int c = 0; c < nhfb; ++c) {
546 mbmi[MI_SIZE_64X64 * c]->cdef_strength = 0;
547 }
548 mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
549 }
550 }
551
av1_cdef_search(MultiThreadInfo * mt_info,const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CDEF_PICK_METHOD pick_method,int rdmult)552 void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
553 const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
554 MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method,
555 int rdmult) {
556 if (pick_method == CDEF_PICK_FROM_Q) {
557 pick_cdef_from_qp(cm);
558 return;
559 }
560 const CommonModeInfoParams *const mi_params = &cm->mi_params;
561 const int damping = 3 + (cm->quant_params.base_qindex >> 6);
562 const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
563 pick_method <= CDEF_FAST_SEARCH_LVL4);
564 const int num_planes = av1_num_planes(cm);
565 CdefSearchCtx cdef_search_ctx;
566 // Initialize parameters related to CDEF search context.
567 cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method);
568 // Allocate CDEF search context buffers.
569 cdef_alloc_data(&cdef_search_ctx);
570 // Frame level mse calculation.
571 if (mt_info->num_workers > 1) {
572 av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
573 } else {
574 cdef_mse_calc_frame(&cdef_search_ctx);
575 }
576
577 /* Search for different number of signaling bits. */
578 int nb_strength_bits = 0;
579 uint64_t best_rd = UINT64_MAX;
580 CdefInfo *const cdef_info = &cm->cdef_info;
581 int sb_count = cdef_search_ctx.sb_count;
582 uint64_t(*mse[2])[TOTAL_STRENGTHS];
583 mse[0] = cdef_search_ctx.mse[0];
584 mse[1] = cdef_search_ctx.mse[1];
585 for (int i = 0; i <= 3; i++) {
586 int best_lev0[CDEF_MAX_STRENGTHS];
587 int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
588 const int nb_strengths = 1 << i;
589 uint64_t tot_mse;
590 if (num_planes > 1) {
591 tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
592 mse, sb_count, pick_method);
593 } else {
594 tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
595 pick_method);
596 }
597
598 const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
599 (num_planes > 1 ? 2 : 1);
600 const int rate_cost = av1_cost_literal(total_bits);
601 const uint64_t dist = tot_mse * 16;
602 const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
603 if (rd < best_rd) {
604 best_rd = rd;
605 nb_strength_bits = i;
606 memcpy(cdef_info->cdef_strengths, best_lev0,
607 nb_strengths * sizeof(best_lev0[0]));
608 if (num_planes > 1) {
609 memcpy(cdef_info->cdef_uv_strengths, best_lev1,
610 nb_strengths * sizeof(best_lev1[0]));
611 }
612 }
613 }
614
615 cdef_info->cdef_bits = nb_strength_bits;
616 cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
617 for (int i = 0; i < sb_count; i++) {
618 uint64_t best_mse = UINT64_MAX;
619 int best_gi = 0;
620 for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
621 uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
622 if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
623 if (curr < best_mse) {
624 best_gi = gi;
625 best_mse = curr;
626 }
627 }
628 mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength =
629 best_gi;
630 }
631
632 if (fast) {
633 for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
634 const int luma_strength = cdef_info->cdef_strengths[j];
635 const int chroma_strength = cdef_info->cdef_uv_strengths[j];
636 int pri_strength, sec_strength;
637
638 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
639 luma_strength);
640 STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
641 chroma_strength);
642 }
643 }
644
645 cdef_info->cdef_damping = damping;
646 // Deallocate CDEF search context buffers.
647 cdef_dealloc_data(&cdef_search_ctx);
648 }
649