1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <math.h>
13 #include <string.h>
14 
15 #include "config/aom_dsp_rtcd.h"
16 #include "config/aom_scale_rtcd.h"
17 
18 #include "aom/aom_integer.h"
19 #include "aom_ports/system_state.h"
20 #include "av1/common/av1_common_int.h"
21 #include "av1/common/reconinter.h"
22 #include "av1/encoder/encoder.h"
23 #include "av1/encoder/ethread.h"
24 #include "av1/encoder/pickcdef.h"
25 
26 // Get primary and secondary filter strength for the given strength index and
27 // search method
get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,int * pri_strength,int * sec_strength,int strength_idx)28 static INLINE void get_cdef_filter_strengths(CDEF_PICK_METHOD pick_method,
29                                              int *pri_strength,
30                                              int *sec_strength,
31                                              int strength_idx) {
32   const int tot_sec_filter = (pick_method >= CDEF_FAST_SEARCH_LVL3)
33                                  ? REDUCED_SEC_STRENGTHS_LVL3
34                                  : CDEF_SEC_STRENGTHS;
35   const int pri_idx = strength_idx / tot_sec_filter;
36   const int sec_idx = strength_idx % tot_sec_filter;
37   *pri_strength = pri_idx;
38   *sec_strength = sec_idx;
39   if (pick_method == CDEF_FULL_SEARCH) return;
40 
41   switch (pick_method) {
42     case CDEF_FAST_SEARCH_LVL1: *pri_strength = priconv_lvl1[pri_idx]; break;
43     case CDEF_FAST_SEARCH_LVL2: *pri_strength = priconv_lvl2[pri_idx]; break;
44     case CDEF_FAST_SEARCH_LVL3:
45       *pri_strength = priconv_lvl2[pri_idx];
46       *sec_strength = secconv_lvl3[sec_idx];
47       break;
48     case CDEF_FAST_SEARCH_LVL4:
49       *pri_strength = priconv_lvl4[pri_idx];
50       *sec_strength = secconv_lvl3[sec_idx];
51       break;
52     default: assert(0 && "Invalid CDEF search method");
53   }
54 }
55 
56 // Store CDEF filter strength calculated from strength index for given search
57 // method
58 #define STORE_CDEF_FILTER_STRENGTH(cdef_strength, pick_method, strength_idx) \
59   get_cdef_filter_strengths((pick_method), &pri_strength, &sec_strength,     \
60                             (strength_idx));                                 \
61   cdef_strength = pri_strength * CDEF_SEC_STRENGTHS + sec_strength;
62 
63 /* Search for the best strength to add as an option, knowing we
64    already selected nb_strengths options. */
search_one(int * lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)65 static uint64_t search_one(int *lev, int nb_strengths,
66                            uint64_t mse[][TOTAL_STRENGTHS], int sb_count,
67                            CDEF_PICK_METHOD pick_method) {
68   uint64_t tot_mse[TOTAL_STRENGTHS];
69   const int total_strengths = nb_cdef_strengths[pick_method];
70   int i, j;
71   uint64_t best_tot_mse = (uint64_t)1 << 63;
72   int best_id = 0;
73   memset(tot_mse, 0, sizeof(tot_mse));
74   for (i = 0; i < sb_count; i++) {
75     int gi;
76     uint64_t best_mse = (uint64_t)1 << 63;
77     /* Find best mse among already selected options. */
78     for (gi = 0; gi < nb_strengths; gi++) {
79       if (mse[i][lev[gi]] < best_mse) {
80         best_mse = mse[i][lev[gi]];
81       }
82     }
83     /* Find best mse when adding each possible new option. */
84     for (j = 0; j < total_strengths; j++) {
85       uint64_t best = best_mse;
86       if (mse[i][j] < best) best = mse[i][j];
87       tot_mse[j] += best;
88     }
89   }
90   for (j = 0; j < total_strengths; j++) {
91     if (tot_mse[j] < best_tot_mse) {
92       best_tot_mse = tot_mse[j];
93       best_id = j;
94     }
95   }
96   lev[nb_strengths] = best_id;
97   return best_tot_mse;
98 }
99 
100 /* Search for the best luma+chroma strength to add as an option, knowing we
101    already selected nb_strengths options. */
search_one_dual(int * lev0,int * lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)102 static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths,
103                                 uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count,
104                                 CDEF_PICK_METHOD pick_method) {
105   uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS];
106   int i, j;
107   uint64_t best_tot_mse = (uint64_t)1 << 63;
108   int best_id0 = 0;
109   int best_id1 = 0;
110   const int total_strengths = nb_cdef_strengths[pick_method];
111   memset(tot_mse, 0, sizeof(tot_mse));
112   for (i = 0; i < sb_count; i++) {
113     int gi;
114     uint64_t best_mse = (uint64_t)1 << 63;
115     /* Find best mse among already selected options. */
116     for (gi = 0; gi < nb_strengths; gi++) {
117       uint64_t curr = mse[0][i][lev0[gi]];
118       curr += mse[1][i][lev1[gi]];
119       if (curr < best_mse) {
120         best_mse = curr;
121       }
122     }
123     /* Find best mse when adding each possible new option. */
124     for (j = 0; j < total_strengths; j++) {
125       int k;
126       for (k = 0; k < total_strengths; k++) {
127         uint64_t best = best_mse;
128         uint64_t curr = mse[0][i][j];
129         curr += mse[1][i][k];
130         if (curr < best) best = curr;
131         tot_mse[j][k] += best;
132       }
133     }
134   }
135   for (j = 0; j < total_strengths; j++) {
136     int k;
137     for (k = 0; k < total_strengths; k++) {
138       if (tot_mse[j][k] < best_tot_mse) {
139         best_tot_mse = tot_mse[j][k];
140         best_id0 = j;
141         best_id1 = k;
142       }
143     }
144   }
145   lev0[nb_strengths] = best_id0;
146   lev1[nb_strengths] = best_id1;
147   return best_tot_mse;
148 }
149 
150 /* Search for the set of strengths that minimizes mse. */
joint_strength_search(int * best_lev,int nb_strengths,uint64_t mse[][TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)151 static uint64_t joint_strength_search(int *best_lev, int nb_strengths,
152                                       uint64_t mse[][TOTAL_STRENGTHS],
153                                       int sb_count,
154                                       CDEF_PICK_METHOD pick_method) {
155   uint64_t best_tot_mse;
156   int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
157               pick_method <= CDEF_FAST_SEARCH_LVL4);
158   int i;
159   best_tot_mse = (uint64_t)1 << 63;
160   /* Greedy search: add one strength options at a time. */
161   for (i = 0; i < nb_strengths; i++) {
162     best_tot_mse = search_one(best_lev, i, mse, sb_count, pick_method);
163   }
164   /* Trying to refine the greedy search by reconsidering each
165      already-selected option. */
166   if (!fast) {
167     for (i = 0; i < 4 * nb_strengths; i++) {
168       int j;
169       for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1];
170       best_tot_mse =
171           search_one(best_lev, nb_strengths - 1, mse, sb_count, pick_method);
172     }
173   }
174   return best_tot_mse;
175 }
176 
177 /* Search for the set of luma+chroma strengths that minimizes mse. */
joint_strength_search_dual(int * best_lev0,int * best_lev1,int nb_strengths,uint64_t (** mse)[TOTAL_STRENGTHS],int sb_count,CDEF_PICK_METHOD pick_method)178 static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1,
179                                            int nb_strengths,
180                                            uint64_t (**mse)[TOTAL_STRENGTHS],
181                                            int sb_count,
182                                            CDEF_PICK_METHOD pick_method) {
183   uint64_t best_tot_mse;
184   int i;
185   best_tot_mse = (uint64_t)1 << 63;
186   /* Greedy search: add one strength options at a time. */
187   for (i = 0; i < nb_strengths; i++) {
188     best_tot_mse =
189         search_one_dual(best_lev0, best_lev1, i, mse, sb_count, pick_method);
190   }
191   /* Trying to refine the greedy search by reconsidering each
192      already-selected option. */
193   for (i = 0; i < 4 * nb_strengths; i++) {
194     int j;
195     for (j = 0; j < nb_strengths - 1; j++) {
196       best_lev0[j] = best_lev0[j + 1];
197       best_lev1[j] = best_lev1[j + 1];
198     }
199     best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse,
200                                    sb_count, pick_method);
201   }
202   return best_tot_mse;
203 }
204 
205 #if CONFIG_AV1_HIGHBITDEPTH
copy_sb16_16_highbd(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)206 static void copy_sb16_16_highbd(uint16_t *dst, int dstride, const void *src,
207                                 int src_voffset, int src_hoffset, int sstride,
208                                 int vsize, int hsize) {
209   int r;
210   const uint16_t *src16 = CONVERT_TO_SHORTPTR((uint8_t *)src);
211   const uint16_t *base = &src16[src_voffset * sstride + src_hoffset];
212   for (r = 0; r < vsize; r++)
213     memcpy(dst + r * dstride, base + r * sstride, hsize * sizeof(*base));
214 }
215 #endif
216 
copy_sb16_16(uint16_t * dst,int dstride,const void * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)217 static void copy_sb16_16(uint16_t *dst, int dstride, const void *src,
218                          int src_voffset, int src_hoffset, int sstride,
219                          int vsize, int hsize) {
220   int r, c;
221   const uint8_t *src8 = (uint8_t *)src;
222   const uint8_t *base = &src8[src_voffset * sstride + src_hoffset];
223   for (r = 0; r < vsize; r++)
224     for (c = 0; c < hsize; c++)
225       dst[r * dstride + c] = (uint16_t)base[r * sstride + c];
226 }
227 
init_src_params(int * src_stride,int * width,int * height,int * width_log2,int * height_log2,BLOCK_SIZE bsize)228 static INLINE void init_src_params(int *src_stride, int *width, int *height,
229                                    int *width_log2, int *height_log2,
230                                    BLOCK_SIZE bsize) {
231   *src_stride = block_size_wide[bsize];
232   *width = block_size_wide[bsize];
233   *height = block_size_high[bsize];
234   *width_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
235   *height_log2 = MI_SIZE_LOG2 + mi_size_wide_log2[bsize];
236 }
237 #if CONFIG_AV1_HIGHBITDEPTH
238 /* Compute MSE only on the blocks we filtered. */
compute_cdef_dist_highbd(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)239 static uint64_t compute_cdef_dist_highbd(void *dst, int dstride, uint16_t *src,
240                                          cdef_list *dlist, int cdef_count,
241                                          BLOCK_SIZE bsize, int coeff_shift,
242                                          int row, int col) {
243   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
244          bsize == BLOCK_8X8);
245   uint64_t sum = 0;
246   int bi, bx, by;
247   uint16_t *dst16 = CONVERT_TO_SHORTPTR((uint8_t *)dst);
248   uint16_t *dst_buff = &dst16[row * dstride + col];
249   int src_stride, width, height, width_log2, height_log2;
250   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
251                   bsize);
252   for (bi = 0; bi < cdef_count; bi++) {
253     by = dlist[bi].by;
254     bx = dlist[bi].bx;
255     sum += aom_mse_wxh_16bit_highbd(
256         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
257         &src[bi << (height_log2 + width_log2)], src_stride, width, height);
258   }
259   return sum >> 2 * coeff_shift;
260 }
261 #endif
compute_cdef_dist(void * dst,int dstride,uint16_t * src,cdef_list * dlist,int cdef_count,BLOCK_SIZE bsize,int coeff_shift,int row,int col)262 static uint64_t compute_cdef_dist(void *dst, int dstride, uint16_t *src,
263                                   cdef_list *dlist, int cdef_count,
264                                   BLOCK_SIZE bsize, int coeff_shift, int row,
265                                   int col) {
266   assert(bsize == BLOCK_4X4 || bsize == BLOCK_4X8 || bsize == BLOCK_8X4 ||
267          bsize == BLOCK_8X8);
268   uint64_t sum = 0;
269   int bi, bx, by;
270   uint8_t *dst8 = (uint8_t *)dst;
271   uint8_t *dst_buff = &dst8[row * dstride + col];
272   int src_stride, width, height, width_log2, height_log2;
273   init_src_params(&src_stride, &width, &height, &width_log2, &height_log2,
274                   bsize);
275   for (bi = 0; bi < cdef_count; bi++) {
276     by = dlist[bi].by;
277     bx = dlist[bi].bx;
278     sum += aom_mse_wxh_16bit(
279         &dst_buff[(by << height_log2) * dstride + (bx << width_log2)], dstride,
280         &src[bi << (height_log2 + width_log2)], src_stride, width, height);
281   }
282   return sum >> 2 * coeff_shift;
283 }
284 
285 // Calculates MSE at block level.
286 // Inputs:
287 //   cdef_search_ctx: Pointer to the structure containing parameters related to
288 //   CDEF search context.
289 //   fbr: Row index in units of 64x64 block
290 //   fbc: Column index in units of 64x64 block
291 // Returns:
292 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
av1_cdef_mse_calc_block(CdefSearchCtx * cdef_search_ctx,int fbr,int fbc,int sb_count)293 void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
294                              int sb_count) {
295   const CommonModeInfoParams *const mi_params = cdef_search_ctx->mi_params;
296   const YV12_BUFFER_CONFIG *ref = cdef_search_ctx->ref;
297   const int coeff_shift = cdef_search_ctx->coeff_shift;
298   const int *mi_wide_l2 = cdef_search_ctx->mi_wide_l2;
299   const int *mi_high_l2 = cdef_search_ctx->mi_high_l2;
300 
301   // Declare and initialize the temporary buffers.
302   DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]);
303   DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]);
304   cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128];
305   int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
306   int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
307   uint16_t *const in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER;
308   int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
309   int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
310   int hb_step = 1, vb_step = 1;
311   BLOCK_SIZE bs;
312 
313   const MB_MODE_INFO *const mbmi =
314       mi_params->mi_grid_base[MI_SIZE_64X64 * fbr * mi_params->mi_stride +
315                               MI_SIZE_64X64 * fbc];
316 
317   uint8_t *ref_buffer[MAX_MB_PLANE] = { ref->y_buffer, ref->u_buffer,
318                                         ref->v_buffer };
319   int ref_stride[MAX_MB_PLANE] = { ref->y_stride, ref->uv_stride,
320                                    ref->uv_stride };
321 
322   if (mbmi->bsize == BLOCK_128X128 || mbmi->bsize == BLOCK_128X64 ||
323       mbmi->bsize == BLOCK_64X128) {
324     bs = mbmi->bsize;
325     if (bs == BLOCK_128X128 || bs == BLOCK_128X64) {
326       nhb = AOMMIN(MI_SIZE_128X128, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
327       hb_step = 2;
328     }
329     if (bs == BLOCK_128X128 || bs == BLOCK_64X128) {
330       nvb = AOMMIN(MI_SIZE_128X128, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
331       vb_step = 2;
332     }
333   } else {
334     bs = BLOCK_64X64;
335   }
336   // Get number of 8x8 blocks which are not skip. Cdef processing happens for
337   // 8x8 blocks which are not skip.
338   const int cdef_count = av1_cdef_compute_sb_list(
339       mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64, dlist, bs);
340 
341   const int yoff = CDEF_VBORDER * (fbr != 0);
342   const int xoff = CDEF_HBORDER * (fbc != 0);
343   int dirinit = 0;
344   for (int pli = 0; pli < cdef_search_ctx->num_planes; pli++) {
345     for (int i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE;
346     /* We avoid filtering the pixels for which some of the pixels to
347     average are outside the frame. We could change the filter instead,
348     but it would add special cases for any future vectorization. */
349     const int ysize = (nvb << mi_high_l2[pli]) +
350                       CDEF_VBORDER * (fbr + vb_step < cdef_search_ctx->nvfb) +
351                       yoff;
352     const int xsize = (nhb << mi_wide_l2[pli]) +
353                       CDEF_HBORDER * (fbc + hb_step < cdef_search_ctx->nhfb) +
354                       xoff;
355     const int row = fbr * MI_SIZE_64X64 << mi_high_l2[pli];
356     const int col = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
357     struct macroblockd_plane pd = cdef_search_ctx->plane[pli];
358     cdef_search_ctx->copy_fn(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE,
359                              pd.dst.buf, row - yoff, col - xoff, pd.dst.stride,
360                              ysize, xsize);
361     for (int gi = 0; gi < cdef_search_ctx->total_strengths; gi++) {
362       int pri_strength, sec_strength;
363       get_cdef_filter_strengths(cdef_search_ctx->pick_method, &pri_strength,
364                                 &sec_strength, gi);
365       av1_cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in,
366                          cdef_search_ctx->xdec[pli], cdef_search_ctx->ydec[pli],
367                          dir, &dirinit, var, pli, dlist, cdef_count,
368                          pri_strength, sec_strength + (sec_strength == 3),
369                          cdef_search_ctx->damping, coeff_shift);
370       const uint64_t curr_mse = cdef_search_ctx->compute_cdef_dist_fn(
371           ref_buffer[pli], ref_stride[pli], tmp_dst, dlist, cdef_count,
372           cdef_search_ctx->bsize[pli], coeff_shift, row, col);
373       if (pli < 2)
374         cdef_search_ctx->mse[pli][sb_count][gi] = curr_mse;
375       else
376         cdef_search_ctx->mse[1][sb_count][gi] += curr_mse;
377     }
378   }
379   cdef_search_ctx->sb_index[sb_count] =
380       MI_SIZE_64X64 * fbr * mi_params->mi_stride + MI_SIZE_64X64 * fbc;
381 }
382 
383 // MSE calculation at frame level.
384 // Inputs:
385 //   cdef_search_ctx: Pointer to the structure containing parameters related to
386 //   CDEF search context.
387 // Returns:
388 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_mse_calc_frame(CdefSearchCtx * cdef_search_ctx)389 static void cdef_mse_calc_frame(CdefSearchCtx *cdef_search_ctx) {
390   // Loop over each sb.
391   for (int fbr = 0; fbr < cdef_search_ctx->nvfb; ++fbr) {
392     for (int fbc = 0; fbc < cdef_search_ctx->nhfb; ++fbc) {
393       // Checks if cdef processing can be skipped for particular sb.
394       if (cdef_sb_skip(cdef_search_ctx->mi_params, fbr, fbc)) continue;
395       // Calculate mse for each sb and store the relevant sb index.
396       av1_cdef_mse_calc_block(cdef_search_ctx, fbr, fbc,
397                               cdef_search_ctx->sb_count);
398       cdef_search_ctx->sb_count++;
399     }
400   }
401 }
402 
403 // Allocates memory for members of CdefSearchCtx.
404 // Inputs:
405 //   cdef_search_ctx: Pointer to the structure containing parameters
406 //   related to CDEF search context.
407 // Returns:
408 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_alloc_data(CdefSearchCtx * cdef_search_ctx)409 static AOM_INLINE void cdef_alloc_data(CdefSearchCtx *cdef_search_ctx) {
410   const int nvfb = cdef_search_ctx->nvfb;
411   const int nhfb = cdef_search_ctx->nhfb;
412   cdef_search_ctx->sb_index =
413       aom_malloc(nvfb * nhfb * sizeof(cdef_search_ctx->sb_index));
414   cdef_search_ctx->sb_count = 0;
415   cdef_search_ctx->mse[0] =
416       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
417   cdef_search_ctx->mse[1] =
418       aom_malloc(sizeof(**cdef_search_ctx->mse) * nvfb * nhfb);
419 }
420 
421 // Deallocates the memory allocated for members of CdefSearchCtx.
422 // Inputs:
423 //   cdef_search_ctx: Pointer to the structure containing parameters
424 //   related to CDEF search context.
425 // Returns:
426 //   Nothing will be returned.
cdef_dealloc_data(CdefSearchCtx * cdef_search_ctx)427 static AOM_INLINE void cdef_dealloc_data(CdefSearchCtx *cdef_search_ctx) {
428   aom_free(cdef_search_ctx->mse[0]);
429   aom_free(cdef_search_ctx->mse[1]);
430   aom_free(cdef_search_ctx->sb_index);
431 }
432 
433 // Initialize the parameters related to CDEF search context.
434 // Inputs:
435 //   frame: Pointer to compressed frame buffer
436 //   ref: Pointer to the frame buffer holding the source frame
437 //   cm: Pointer to top level common structure
438 //   xd: Pointer to common current coding block structure
439 //   cdef_search_ctx: Pointer to the structure containing parameters related to
440 //   CDEF search context.
441 //   pick_method: Search method used to select CDEF parameters
442 // Returns:
443 //   Nothing will be returned. Contents of cdef_search_ctx will be modified.
cdef_params_init(const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CdefSearchCtx * cdef_search_ctx,CDEF_PICK_METHOD pick_method)444 static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
445                                         const YV12_BUFFER_CONFIG *ref,
446                                         AV1_COMMON *cm, MACROBLOCKD *xd,
447                                         CdefSearchCtx *cdef_search_ctx,
448                                         CDEF_PICK_METHOD pick_method) {
449   const CommonModeInfoParams *const mi_params = &cm->mi_params;
450   const int num_planes = av1_num_planes(cm);
451   cdef_search_ctx->mi_params = &cm->mi_params;
452   cdef_search_ctx->ref = ref;
453   cdef_search_ctx->nvfb =
454       (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
455   cdef_search_ctx->nhfb =
456       (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
457   cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
458   cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
459   cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
460   cdef_search_ctx->num_planes = num_planes;
461   cdef_search_ctx->pick_method = pick_method;
462   cdef_search_ctx->sb_count = 0;
463   av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
464                        num_planes);
465   // Initialize plane wise information.
466   for (int pli = 0; pli < num_planes; pli++) {
467     cdef_search_ctx->xdec[pli] = xd->plane[pli].subsampling_x;
468     cdef_search_ctx->ydec[pli] = xd->plane[pli].subsampling_y;
469     cdef_search_ctx->bsize[pli] =
470         cdef_search_ctx->ydec[pli]
471             ? (cdef_search_ctx->xdec[pli] ? BLOCK_4X4 : BLOCK_8X4)
472             : (cdef_search_ctx->xdec[pli] ? BLOCK_4X8 : BLOCK_8X8);
473     cdef_search_ctx->mi_wide_l2[pli] =
474         MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
475     cdef_search_ctx->mi_high_l2[pli] =
476         MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
477     cdef_search_ctx->plane[pli] = xd->plane[pli];
478   }
479   // Function pointer initialization.
480 #if CONFIG_AV1_HIGHBITDEPTH
481   if (cm->seq_params.use_highbitdepth) {
482     cdef_search_ctx->copy_fn = copy_sb16_16_highbd;
483     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
484   } else {
485     cdef_search_ctx->copy_fn = copy_sb16_16;
486     cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
487   }
488 #else
489   cdef_search_ctx->copy_fn = copy_sb16_16;
490   cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist;
491 #endif
492 }
493 
pick_cdef_from_qp(AV1_COMMON * const cm)494 static void pick_cdef_from_qp(AV1_COMMON *const cm) {
495   const int bd = cm->seq_params.bit_depth;
496   const int q =
497       av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
498   CdefInfo *const cdef_info = &cm->cdef_info;
499   cdef_info->cdef_bits = 0;
500   cdef_info->nb_cdef_strengths = 1;
501   cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
502 
503   int predicted_y_f1 = 0;
504   int predicted_y_f2 = 0;
505   int predicted_uv_f1 = 0;
506   int predicted_uv_f2 = 0;
507   aom_clear_system_state();
508   if (!frame_is_intra_only(cm)) {
509     predicted_y_f1 = clamp((int)roundf(q * q * -0.0000023593946f +
510                                        q * 0.0068615186f + 0.02709886f),
511                            0, 15);
512     predicted_y_f2 = clamp((int)roundf(q * q * -0.00000057629734f +
513                                        q * 0.0013993345f + 0.03831067f),
514                            0, 3);
515     predicted_uv_f1 = clamp((int)roundf(q * q * -0.0000007095069f +
516                                         q * 0.0034628846f + 0.00887099f),
517                             0, 15);
518     predicted_uv_f2 = clamp((int)roundf(q * q * 0.00000023874085f +
519                                         q * 0.00028223585f + 0.05576307f),
520                             0, 3);
521   } else {
522     predicted_y_f1 = clamp(
523         (int)roundf(q * q * 0.0000033731974f + q * 0.008070594f + 0.0187634f),
524         0, 15);
525     predicted_y_f2 = clamp(
526         (int)roundf(q * q * 0.0000029167343f + q * 0.0027798624f + 0.0079405f),
527         0, 3);
528     predicted_uv_f1 = clamp(
529         (int)roundf(q * q * -0.0000130790995f + q * 0.012892405f - 0.00748388f),
530         0, 15);
531     predicted_uv_f2 = clamp((int)roundf(q * q * 0.0000032651783f +
532                                         q * 0.00035520183f + 0.00228092f),
533                             0, 3);
534   }
535   cdef_info->cdef_strengths[0] =
536       predicted_y_f1 * CDEF_SEC_STRENGTHS + predicted_y_f2;
537   cdef_info->cdef_uv_strengths[0] =
538       predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
539 
540   const CommonModeInfoParams *const mi_params = &cm->mi_params;
541   const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
542   const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
543   MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
544   for (int r = 0; r < nvfb; ++r) {
545     for (int c = 0; c < nhfb; ++c) {
546       mbmi[MI_SIZE_64X64 * c]->cdef_strength = 0;
547     }
548     mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
549   }
550 }
551 
av1_cdef_search(MultiThreadInfo * mt_info,const YV12_BUFFER_CONFIG * frame,const YV12_BUFFER_CONFIG * ref,AV1_COMMON * cm,MACROBLOCKD * xd,CDEF_PICK_METHOD pick_method,int rdmult)552 void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
553                      const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
554                      MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method,
555                      int rdmult) {
556   if (pick_method == CDEF_PICK_FROM_Q) {
557     pick_cdef_from_qp(cm);
558     return;
559   }
560   const CommonModeInfoParams *const mi_params = &cm->mi_params;
561   const int damping = 3 + (cm->quant_params.base_qindex >> 6);
562   const int fast = (pick_method >= CDEF_FAST_SEARCH_LVL1 &&
563                     pick_method <= CDEF_FAST_SEARCH_LVL4);
564   const int num_planes = av1_num_planes(cm);
565   CdefSearchCtx cdef_search_ctx;
566   // Initialize parameters related to CDEF search context.
567   cdef_params_init(frame, ref, cm, xd, &cdef_search_ctx, pick_method);
568   // Allocate CDEF search context buffers.
569   cdef_alloc_data(&cdef_search_ctx);
570   // Frame level mse calculation.
571   if (mt_info->num_workers > 1) {
572     av1_cdef_mse_calc_frame_mt(cm, mt_info, &cdef_search_ctx);
573   } else {
574     cdef_mse_calc_frame(&cdef_search_ctx);
575   }
576 
577   /* Search for different number of signaling bits. */
578   int nb_strength_bits = 0;
579   uint64_t best_rd = UINT64_MAX;
580   CdefInfo *const cdef_info = &cm->cdef_info;
581   int sb_count = cdef_search_ctx.sb_count;
582   uint64_t(*mse[2])[TOTAL_STRENGTHS];
583   mse[0] = cdef_search_ctx.mse[0];
584   mse[1] = cdef_search_ctx.mse[1];
585   for (int i = 0; i <= 3; i++) {
586     int best_lev0[CDEF_MAX_STRENGTHS];
587     int best_lev1[CDEF_MAX_STRENGTHS] = { 0 };
588     const int nb_strengths = 1 << i;
589     uint64_t tot_mse;
590     if (num_planes > 1) {
591       tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths,
592                                            mse, sb_count, pick_method);
593     } else {
594       tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count,
595                                       pick_method);
596     }
597 
598     const int total_bits = sb_count * i + nb_strengths * CDEF_STRENGTH_BITS *
599                                               (num_planes > 1 ? 2 : 1);
600     const int rate_cost = av1_cost_literal(total_bits);
601     const uint64_t dist = tot_mse * 16;
602     const uint64_t rd = RDCOST(rdmult, rate_cost, dist);
603     if (rd < best_rd) {
604       best_rd = rd;
605       nb_strength_bits = i;
606       memcpy(cdef_info->cdef_strengths, best_lev0,
607              nb_strengths * sizeof(best_lev0[0]));
608       if (num_planes > 1) {
609         memcpy(cdef_info->cdef_uv_strengths, best_lev1,
610                nb_strengths * sizeof(best_lev1[0]));
611       }
612     }
613   }
614 
615   cdef_info->cdef_bits = nb_strength_bits;
616   cdef_info->nb_cdef_strengths = 1 << nb_strength_bits;
617   for (int i = 0; i < sb_count; i++) {
618     uint64_t best_mse = UINT64_MAX;
619     int best_gi = 0;
620     for (int gi = 0; gi < cdef_info->nb_cdef_strengths; gi++) {
621       uint64_t curr = mse[0][i][cdef_info->cdef_strengths[gi]];
622       if (num_planes > 1) curr += mse[1][i][cdef_info->cdef_uv_strengths[gi]];
623       if (curr < best_mse) {
624         best_gi = gi;
625         best_mse = curr;
626       }
627     }
628     mi_params->mi_grid_base[cdef_search_ctx.sb_index[i]]->cdef_strength =
629         best_gi;
630   }
631 
632   if (fast) {
633     for (int j = 0; j < cdef_info->nb_cdef_strengths; j++) {
634       const int luma_strength = cdef_info->cdef_strengths[j];
635       const int chroma_strength = cdef_info->cdef_uv_strengths[j];
636       int pri_strength, sec_strength;
637 
638       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_strengths[j], pick_method,
639                                  luma_strength);
640       STORE_CDEF_FILTER_STRENGTH(cdef_info->cdef_uv_strengths[j], pick_method,
641                                  chroma_strength);
642     }
643   }
644 
645   cdef_info->cdef_damping = damping;
646   // Deallocate CDEF search context buffers.
647   cdef_dealloc_data(&cdef_search_ctx);
648 }
649