1 /*
2 * Copyright(c) 2019 Intel Corporation
3 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
4 *
5 * This source code is subject to the terms of the BSD 2 Clause License and
6 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
7 * was not distributed with this source code in the LICENSE file, you can
8 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
9 * Media Patent License 1.0 was not distributed with this source code in the
10 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
11 */
12 
13 #include <string.h>
14 
15 #include "EbDeblockingFilter.h"
16 #include "EbDefinitions.h"
17 #include "EbUtility.h"
18 #include "EbPictureControlSet.h"
19 #include "EbCodingUnit.h"
20 #include "EbSequenceControlSet.h"
21 #include "EbReferenceObject.h"
22 #include "EbCommonUtils.h"
23 //#include "EbLog.h"
24 
svt_av1_loop_filter_init(PictureControlSet * pcs_ptr)25 void svt_av1_loop_filter_init(PictureControlSet *pcs_ptr) {
26     //assert(MB_MODE_COUNT == n_elements(mode_lf_lut));
27     LoopFilterInfoN *  lfi = &pcs_ptr->parent_pcs_ptr->lf_info;
28     struct LoopFilter *lf  = &pcs_ptr->parent_pcs_ptr->frm_hdr.loop_filter_params;
29     int32_t            lvl;
30 
31     lf->combine_vert_horz_lf = 1;
32 
33     // init limits for given sharpness
34     update_sharpness(lfi, lf->sharpness_level);
35 
36     // init hev threshold const vectors
37     for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++)
38         memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH);
39 }
40 
41 //***************************************************************************************************//
42 
scaled_buffer_offset(int32_t x_offset,int32_t y_offset,int32_t stride)43 static INLINE int32_t scaled_buffer_offset(int32_t x_offset, int32_t y_offset, int32_t stride/*,
44     const struct scale_factors *sf*/) {
45     const int32_t x =
46         /*sf ? sf->scale_value_x(x_offset, sf) >> SCALE_EXTRA_BITS :*/ x_offset;
47     const int32_t y =
48         /*sf ? sf->scale_value_y(y_offset, sf) >> SCALE_EXTRA_BITS :*/ y_offset;
49     return y * stride + x;
50 }
setup_pred_plane(struct Buf2D * dst,BlockSize bsize,uint8_t * src,int32_t width,int32_t height,int32_t stride,int32_t mi_row,int32_t mi_col,int32_t subsampling_x,int32_t subsampling_y,int32_t is_16bit)51 static INLINE void setup_pred_plane(struct Buf2D *dst, BlockSize bsize, uint8_t *src, int32_t width,
52                                     int32_t height, int32_t stride, int32_t mi_row, int32_t mi_col,
53                                     /*const struct scale_factors *scale,*/
54                                     int32_t subsampling_x, int32_t subsampling_y,
55                                     int32_t is_16bit) {
56     // Offset the buffer pointer
57     if (subsampling_y && (mi_row & 0x01) && (mi_size_high[bsize] == 1))
58         mi_row -= 1;
59     if (subsampling_x && (mi_col & 0x01) && (mi_size_wide[bsize] == 1))
60         mi_col -= 1;
61 
62     const int32_t x = (MI_SIZE * mi_col) >> subsampling_x;
63     const int32_t y = (MI_SIZE * mi_row) >> subsampling_y;
64     dst->buf        = src + (scaled_buffer_offset(x, y, stride /*, scale*/) << is_16bit);
65     dst->buf0       = src;
66     dst->width      = width;
67     dst->height     = height;
68     dst->stride     = stride;
69 }
svt_av1_setup_dst_planes(struct MacroblockdPlane * planes,BlockSize bsize,const EbPictureBufferDesc * src,int32_t mi_row,int32_t mi_col,const int32_t plane_start,const int32_t plane_end)70 void svt_av1_setup_dst_planes(struct MacroblockdPlane *planes, BlockSize bsize,
71                               //const Yv12BufferConfig *src,
72                               const EbPictureBufferDesc *src, int32_t mi_row, int32_t mi_col,
73                               const int32_t plane_start, const int32_t plane_end) {
74     // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
75     // the static analysis warnings.
76     //for (int32_t i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
77     //    struct MacroblockdPlane *const pd = &planes[i];
78     //    const int32_t is_uv = i > 0;
79     //    setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
80     //        src->crop_heights[is_uv], src->strides[is_uv], mi_row,
81     //        mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
82     //}
83     for (int32_t i = plane_start; i < AOMMIN(plane_end, 3); ++i) {
84         if (i == 0) {
85             struct MacroblockdPlane *const pd = &planes[0];
86             setup_pred_plane(
87                 &pd->dst,
88                 bsize,
89                 &src->buffer_y[(src->origin_x + src->origin_y * src->stride_y) << pd->is_16bit],
90                 src->width,
91                 src->height,
92                 src->stride_y,
93                 mi_row,
94                 mi_col,
95                 /*NULL,*/ pd->subsampling_x,
96                 pd->subsampling_y,
97                 pd->is_16bit); //AMIR: Updated to point to the right location
98         } else if (i == 1) {
99             struct MacroblockdPlane *const pd = &planes[1];
100             setup_pred_plane(
101                 &pd->dst,
102                 bsize,
103                 &src->buffer_cb[((src->origin_x + src->origin_y * src->stride_cb) << pd->is_16bit) /
104                                 2],
105                 src->width / 2,
106                 src->height / 2,
107                 src->stride_cb,
108                 mi_row,
109                 mi_col,
110                 /*NULL,*/ pd->subsampling_x,
111                 pd->subsampling_y,
112                 pd->is_16bit);
113         } else if (i == 2) {
114             struct MacroblockdPlane *const pd = &planes[2];
115             setup_pred_plane(
116                 &pd->dst,
117                 bsize,
118                 &src->buffer_cr[((src->origin_x + src->origin_y * src->stride_cr) << pd->is_16bit) /
119                                 2],
120                 src->width / 2,
121                 src->height / 2,
122                 src->stride_cr,
123                 mi_row,
124                 mi_col,
125                 /* NULL,*/ pd->subsampling_x,
126                 pd->subsampling_y,
127                 pd->is_16bit);
128         }
129     }
130 }
131 
132 //***************************************************************************************************//
get_transform_size(const MbModeInfo * const mbmi,const EdgeDir edge_dir,const int32_t plane,const struct MacroblockdPlane * plane_ptr,const EbBool is_skip)133 static INLINE TxSize get_transform_size(const MbModeInfo *const mbmi, const EdgeDir edge_dir,
134     const int32_t plane, const struct MacroblockdPlane *plane_ptr, const EbBool is_skip) {
135     assert(mbmi != NULL);
136 
137     TxSize tx_size = (plane == COMPONENT_LUMA)
138         ? (is_skip
139             ? tx_depth_to_tx_size[0][mbmi->block_mi.sb_type]
140             : tx_depth_to_tx_size[mbmi->tx_depth][mbmi->block_mi.sb_type]) // use max_tx_size
141         : av1_get_max_uv_txsize(mbmi->block_mi.sb_type, plane_ptr->subsampling_x, plane_ptr->subsampling_y);
142     assert(tx_size < TX_SIZES_ALL);
143 
144     // since in case of chrominance or non-square transorm need to convert
145     // transform size into transform size in particular direction.
146     // for vertical edge, filter direction is horizontal, for horizontal
147     // edge, filter direction is vertical.
148     return (VERT_EDGE == edge_dir) ? txsize_horz_map[tx_size] : txsize_vert_map[tx_size];
149 }
150 
151 // Return TxSize from get_transform_size(), so it is plane and direction
152 // awared
set_lpf_parameters(Av1DeblockingParameters * const params,const uint64_t mode_step,const PictureControlSet * const pcs_ptr,const EdgeDir edge_dir,const uint32_t x,const uint32_t y,const int32_t plane,const struct MacroblockdPlane * const plane_ptr)153 static TxSize set_lpf_parameters(Av1DeblockingParameters *const params, const uint64_t mode_step,
154                                  const PictureControlSet *const pcs_ptr,
155                                  const EdgeDir edge_dir, const uint32_t x,
156                                  const uint32_t y, const int32_t plane,
157                                  const struct MacroblockdPlane *const plane_ptr) {
158     FrameHeader *          frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
159     const LoopFilterInfoN *lfi_n   = &pcs_ptr->parent_pcs_ptr->lf_info;
160 
161     // reset to initial values
162     params->filter_length = 0;
163 
164     // no deblocking is required
165     const uint32_t width  = plane_ptr->dst.width;
166     const uint32_t height = plane_ptr->dst.height;
167     if ((width <= x) || (height <= y)) {
168         // just return the smallest transform unit size
169         return TX_4X4;
170     }
171 
172     const uint32_t scale_horz = plane_ptr->subsampling_x;
173     const uint32_t scale_vert = plane_ptr->subsampling_y;
174     // for sub8x8 block, chroma prediction mode is obtained from the bottom/right
175     // mi structure of the co-located 8x8 luma block. so for chroma plane, mi_row
176     // and mi_col should map to the bottom/right mi structure, i.e, both mi_row
177     // and mi_col should be odd number for chroma plane.
178 
179     const int32_t mi_row    = scale_vert | ((y << scale_vert) >> MI_SIZE_LOG2);
180     const int32_t mi_col    = scale_horz | ((x << scale_horz) >> MI_SIZE_LOG2);
181     uint32_t      mi_stride = pcs_ptr->mi_stride;
182     const int32_t offset    = mi_row * mi_stride + mi_col;
183     ModeInfo **   mi        = (pcs_ptr->mi_grid_base + offset);
184     //MbModeInfo **mi = cm->mi_grid_visible + mi_row * cm->mi_stride + mi_col;
185     const MbModeInfo *mbmi = &mi[0]->mbmi;
186 
187     // If current mbmi is not correctly setup, return an invalid value to stop
188     // filtering. One example is that if this tile is not coded, then its mbmi
189     // it not set up.
190     if (mbmi == NULL)
191         return TX_INVALID;
192     const int32_t curr_skipped = mbmi->block_mi.skip &&
193         is_inter_block_no_intrabc(mbmi->block_mi.ref_frame[0]);
194     const TxSize ts = get_transform_size(mbmi, edge_dir, plane, plane_ptr, curr_skipped);
195     assert(ts < TX_SIZES_ALL);
196 
197     {
198         const uint32_t coord           = (VERT_EDGE == edge_dir) ? (x) : (y);
199         const uint32_t transform_masks = edge_dir == VERT_EDGE ? tx_size_wide[ts] - 1
200                                                                : tx_size_high[ts] - 1;
201         const int32_t  txb_edge        = (coord & transform_masks) ? (0) : (1);
202 
203         if (!txb_edge)
204             return ts;
205 
206         // prepare outer edge parameters. deblock the edge if it's an edge of a TU
207         {
208             uint32_t       curr_level; // Added to address 4x4 problem
209             PredictionMode mode = (mbmi->block_mi.mode == INTRA_MODE_4x4) ? DC_PRED
210                                                                           : mbmi->block_mi.mode;
211             if (frm_hdr->delta_lf_params.delta_lf_present) {
212                 curr_level = get_filter_level_delta_lf(frm_hdr,
213                                                        edge_dir,
214                                                        plane,
215                                                        pcs_ptr->parent_pcs_ptr->curr_delta_lf,
216                                                        0 /*segment_id*/,
217                                                        mode,
218                                                        mbmi->block_mi.ref_frame[0]);
219             } else {
220                 assert(mode < 25);
221                 curr_level = lfi_n->lvl[plane][0 /*segment_id*/][edge_dir]
222                                        [mbmi->block_mi.ref_frame[0]][mode_lf_lut[mode]];
223             }
224 
225             uint32_t level = curr_level;
226             if (coord) {
227                 //const ModeInfo *const mi_prev = *(mi - mode_step);
228                 const ModeInfo *const   mi_prev_temp = *(mi - mode_step);
229                 const MbModeInfo *const mi_prev      = &mi_prev_temp[0].mbmi;
230                 //
231                 if (mi_prev == NULL)
232                     return TX_INVALID;
233                 const int32_t pv_skip = mi_prev->block_mi.skip &&
234                     is_inter_block_no_intrabc(mi_prev->block_mi.ref_frame[0]);
235                 const TxSize  pv_ts = get_transform_size(mi_prev, edge_dir, plane, plane_ptr, pv_skip);
236                 uint32_t pv_lvl;
237                 mode = (mi_prev->block_mi.mode == INTRA_MODE_4x4) ? DC_PRED
238                                                                   : mi_prev->block_mi.mode;
239                 if (frm_hdr->delta_lf_params.delta_lf_present) {
240                     pv_lvl = get_filter_level_delta_lf(frm_hdr,
241                                                        edge_dir,
242                                                        plane,
243                                                        pcs_ptr->parent_pcs_ptr->curr_delta_lf,
244                                                        0 /*segment_id*/,
245                                                        mi_prev->block_mi.mode,
246                                                        mi_prev->block_mi.ref_frame[0]);
247                 } else {
248                     assert(mode < 25);
249                     pv_lvl = lfi_n->lvl[plane][0 /*segment_id*/][edge_dir]
250                                        [mi_prev->block_mi.ref_frame[0]][mode_lf_lut[mode]];
251                 }
252 
253                 const BlockSize bsize = get_plane_block_size(
254                     mbmi->block_mi.sb_type, plane_ptr->subsampling_x, plane_ptr->subsampling_y);
255                 assert(bsize < BlockSizeS_ALL);
256                 const int32_t prediction_masks = (edge_dir == VERT_EDGE)
257                     ? block_size_wide[bsize] - 1
258                     : block_size_high[bsize] - 1;
259                 const int32_t pu_edge          = !(coord & prediction_masks);
260                 // if the current and the previous blocks are skipped,
261                 // deblock the edge if the edge belongs to a PU's edge only.
262                 if ((curr_level || pv_lvl) && (!pv_skip || !curr_skipped || pu_edge)) {
263                     const TxSize min_ts = AOMMIN(ts, pv_ts);
264                     if (TX_4X4 >= min_ts)
265                         params->filter_length = 4;
266                     else
267                         params->filter_length = (plane != 0) ? 6 : (TX_8X8 == min_ts) ? 8 : 14;
268                     // update the level if the current block is skipped,
269                     // but the previous one is not
270                     level = (curr_level) ? (curr_level) : (pv_lvl);
271                 }
272             }
273             // prepare common parameters
274             if (params->filter_length) {
275                 const LoopFilterThresh *const limits = pcs_ptr->parent_pcs_ptr->lf_info.lfthr +
276                     level;
277                 params->lim     = limits->lim;
278                 params->mblim   = limits->mblim;
279                 params->hev_thr = limits->hev_thr;
280             }
281         }
282     }
283 
284     return ts;
285 }
286 
svt_av1_filter_block_plane_vert(const PictureControlSet * const pcs_ptr,const int32_t plane,const MacroblockdPlane * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)287 void svt_av1_filter_block_plane_vert(const PictureControlSet *const pcs_ptr,
288                                      const int32_t plane,
289                                      const MacroblockdPlane *const plane_ptr, const uint32_t mi_row,
290                                      const uint32_t mi_col) {
291     SequenceControlSet *scs_ptr = (SequenceControlSet *)
292                                       pcs_ptr->parent_pcs_ptr->scs_wrapper_ptr->object_ptr;
293     EbBool is_16bit = scs_ptr->static_config.encoder_bit_depth > 8;
294     // TODO
295     // when loop_filter_mode = 1, dblk is processed in encdec
296     // 16 bit dblk for loop_filter_mode = 1 needs to enabled after 16bit encdec is done
297     if (scs_ptr->static_config.is_16bit_pipeline)
298         is_16bit = EB_TRUE;
299     const int32_t  row_step   = MI_SIZE >> MI_SIZE_LOG2;
300     const uint32_t scale_horz = plane_ptr->subsampling_x;
301     const uint32_t scale_vert = plane_ptr->subsampling_y;
302     uint8_t *const dst_ptr    = plane_ptr->dst.buf;
303     const int32_t  dst_stride = plane_ptr->dst.stride;
304     int32_t y_range = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_vert)
305                                                                    : (SB64_MIB_SIZE >> scale_vert);
306     int32_t x_range = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_horz)
307                                                                    : (SB64_MIB_SIZE >> scale_horz);
308     const uint32_t sb_size = (scs_ptr->seq_header.sb_size == BLOCK_128X128) ? 128 : 64;
309     if (mi_row == (scs_ptr->max_input_luma_height / sb_size * sb_size) >> MI_SIZE_LOG2) {
310         y_range = (((scs_ptr->max_input_luma_height - scs_ptr->max_input_pad_bottom) % sb_size) +
311                    MI_SIZE - 1) >>
312             MI_SIZE_LOG2;
313         if (plane) {
314             y_range = ((((scs_ptr->max_input_luma_height - scs_ptr->max_input_pad_bottom) %
315                          sb_size) >>
316                         scale_vert) +
317                        MI_SIZE - 1) >>
318                 MI_SIZE_LOG2;
319         }
320     }
321 
322     if (mi_col == (scs_ptr->max_input_luma_width / sb_size * sb_size) >> MI_SIZE_LOG2) {
323         x_range = (((scs_ptr->max_input_luma_width - scs_ptr->max_input_pad_right) % sb_size) +
324                    MI_SIZE - 1) >>
325             MI_SIZE_LOG2;
326         if (plane) {
327             x_range = ((((scs_ptr->max_input_luma_width - scs_ptr->max_input_pad_right) %
328                          sb_size) >>
329                         scale_horz) +
330                        MI_SIZE - 1) >>
331                 MI_SIZE_LOG2;
332         }
333     }
334 
335     for (int32_t y = 0; y < y_range; y += row_step) {
336         uint8_t *p = dst_ptr + ((y * MI_SIZE * dst_stride) << plane_ptr->is_16bit);
337         for (int32_t x = 0; x < x_range;) {
338             // inner loop always filter vertical edges in a MI block. If MI size
339             // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
340             // If 4x4 trasnform is used, it will then filter the internal edge
341             //  aligned with a 4x4 block
342             const uint32_t          curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
343             const uint32_t          curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
344             uint32_t                advance_units;
345             TxSize                  tx_size;
346             Av1DeblockingParameters params;
347             memset(&params, 0, sizeof(params));
348 
349             tx_size = set_lpf_parameters(&params,
350                                          ((uint64_t)1 << scale_horz),
351                                          pcs_ptr,
352                                          VERT_EDGE,
353                                          curr_x,
354                                          curr_y,
355                                          plane,
356                                          plane_ptr);
357             if (tx_size == TX_INVALID) {
358                 params.filter_length = 0;
359                 tx_size              = TX_4X4;
360             }
361 
362             switch (params.filter_length) {
363                 // apply 4-tap filtering
364             case 4:
365                 if (is_16bit) {
366                     svt_aom_highbd_lpf_vertical_4((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
367                                                   dst_stride,
368                                                   params.mblim,
369                                                   params.lim,
370                                                   params.hev_thr,
371                                                   scs_ptr->static_config.encoder_bit_depth);
372                 } else {
373                     svt_aom_lpf_vertical_4(p, dst_stride, params.mblim, params.lim, params.hev_thr);
374                 }
375                 break;
376             case 6: // apply 6-tap filter for chroma plane only
377                 assert(plane != 0);
378                 if (is_16bit) {
379                     svt_aom_highbd_lpf_vertical_6((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
380                                                   dst_stride,
381                                                   params.mblim,
382                                                   params.lim,
383                                                   params.hev_thr,
384                                                   scs_ptr->static_config.encoder_bit_depth);
385                 } else {
386                     svt_aom_lpf_vertical_6(p, dst_stride, params.mblim, params.lim, params.hev_thr);
387                 }
388                 break;
389                 // apply 8-tap filtering
390             case 8:
391                 if (is_16bit) {
392                     svt_aom_highbd_lpf_vertical_8((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
393                                                   dst_stride,
394                                                   params.mblim,
395                                                   params.lim,
396                                                   params.hev_thr,
397                                                   scs_ptr->static_config.encoder_bit_depth);
398                 } else {
399                     svt_aom_lpf_vertical_8(p, dst_stride, params.mblim, params.lim, params.hev_thr);
400                 }
401                 break;
402                 // apply 14-tap filtering
403             case 14:
404                 if (is_16bit) {
405                     svt_aom_highbd_lpf_vertical_14((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
406                                                    dst_stride,
407                                                    params.mblim,
408                                                    params.lim,
409                                                    params.hev_thr,
410                                                    scs_ptr->static_config.encoder_bit_depth);
411                 } else {
412                     svt_aom_lpf_vertical_14(
413                         p, dst_stride, params.mblim, params.lim, params.hev_thr);
414                 }
415                 break;
416                 // no filtering
417             default: break;
418             }
419             // advance the destination pointer
420             assert(tx_size < TX_SIZES_ALL);
421             advance_units = tx_size_wide_unit[tx_size];
422             x += advance_units;
423             p += ((advance_units * MI_SIZE) << plane_ptr->is_16bit);
424         }
425     }
426 }
427 
svt_av1_filter_block_plane_horz(const PictureControlSet * const pcs_ptr,const int32_t plane,const MacroblockdPlane * const plane_ptr,const uint32_t mi_row,const uint32_t mi_col)428 void svt_av1_filter_block_plane_horz(const PictureControlSet *const pcs_ptr,
429                                      const int32_t plane,
430                                      const MacroblockdPlane *const plane_ptr, const uint32_t mi_row,
431                                      const uint32_t mi_col) {
432     SequenceControlSet *scs_ptr = (SequenceControlSet *)
433                                       pcs_ptr->parent_pcs_ptr->scs_wrapper_ptr->object_ptr;
434     EbBool is_16bit = scs_ptr->static_config.encoder_bit_depth > 8;
435     // when loop_filter_mode = 1, dblk is processed in encdec
436     // 16 bit dblk for loop_filter_mode = 1 needs to enabled after 16bit encdec is done
437     if (scs_ptr->static_config.is_16bit_pipeline)
438         is_16bit = EB_TRUE;
439     const int32_t  col_step   = MI_SIZE >> MI_SIZE_LOG2;
440     const uint32_t scale_horz = plane_ptr->subsampling_x;
441     const uint32_t scale_vert = plane_ptr->subsampling_y;
442     uint8_t *const dst_ptr    = plane_ptr->dst.buf;
443     const int32_t  dst_stride = plane_ptr->dst.stride;
444     int32_t  y_range   = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_vert)
445                                                                       : (SB64_MIB_SIZE >> scale_vert);
446     int32_t  x_range   = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? (MAX_MIB_SIZE >> scale_horz)
447                                                                       : (SB64_MIB_SIZE >> scale_horz);
448     uint32_t mi_stride = pcs_ptr->mi_stride;
449 
450     const uint32_t sb_size = (scs_ptr->seq_header.sb_size == BLOCK_128X128) ? 128 : 64;
451     if (mi_row == (scs_ptr->max_input_luma_height / sb_size * sb_size) >> MI_SIZE_LOG2) {
452         y_range = (((scs_ptr->max_input_luma_height - scs_ptr->max_input_pad_bottom) % sb_size) +
453                    MI_SIZE - 1) >>
454             MI_SIZE_LOG2;
455         if (plane) {
456             y_range = ((((scs_ptr->max_input_luma_height - scs_ptr->max_input_pad_bottom) %
457                          sb_size) >>
458                         scale_vert) +
459                        MI_SIZE - 1) >>
460                 MI_SIZE_LOG2;
461         }
462     }
463 
464     if (mi_col == (scs_ptr->max_input_luma_width / sb_size * sb_size) >> MI_SIZE_LOG2) {
465         x_range = (((scs_ptr->max_input_luma_width - scs_ptr->max_input_pad_right) % sb_size) +
466                    MI_SIZE - 1) >>
467             MI_SIZE_LOG2;
468         if (plane) {
469             x_range = ((((scs_ptr->max_input_luma_width - scs_ptr->max_input_pad_right) %
470                          sb_size) >>
471                         scale_horz) +
472                        MI_SIZE - 1) >>
473                 MI_SIZE_LOG2;
474         }
475     }
476 
477     for (int32_t x = 0; x < x_range; x += col_step) {
478         uint8_t *p = dst_ptr + ((x * MI_SIZE) << plane_ptr->is_16bit);
479         for (int32_t y = 0; y < y_range;) {
480             // inner loop always filter vertical edges in a MI block. If MI size
481             // is 8x8, it will first filter the vertical edge aligned with a 8x8
482             // block. If 4x4 trasnform is used, it will then filter the internal
483             // edge aligned with a 4x4 block
484             const uint32_t          curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
485             const uint32_t          curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
486             uint32_t                advance_units;
487             TxSize                  tx_size;
488             Av1DeblockingParameters params;
489             memset(&params, 0, sizeof(params));
490 
491             tx_size = set_lpf_parameters(
492                 &params,
493                 //(pcs_ptr->parent_pcs_ptr->av1_cm->mi_stride << scale_vert),
494                 (mi_stride << scale_vert),
495                 pcs_ptr,
496                 HORZ_EDGE,
497                 curr_x,
498                 curr_y,
499                 plane,
500                 plane_ptr);
501             if (tx_size == TX_INVALID) {
502                 params.filter_length = 0;
503                 tx_size              = TX_4X4;
504             }
505 
506             switch (params.filter_length) {
507                 // apply 4-tap filtering
508             case 4:
509                 if (is_16bit) {
510                     svt_aom_highbd_lpf_horizontal_4((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
511                                                     dst_stride,
512                                                     params.mblim,
513                                                     params.lim,
514                                                     params.hev_thr,
515                                                     scs_ptr->static_config.encoder_bit_depth);
516                 } else {
517                     svt_aom_lpf_horizontal_4(
518                         p, dst_stride, params.mblim, params.lim, params.hev_thr);
519                 }
520                 break;
521                 // apply 6-tap filtering
522             case 6:
523                 assert(plane != 0);
524                 if (is_16bit) {
525                     svt_aom_highbd_lpf_horizontal_6((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
526                                                     dst_stride,
527                                                     params.mblim,
528                                                     params.lim,
529                                                     params.hev_thr,
530                                                     scs_ptr->static_config.encoder_bit_depth);
531                 } else {
532                     svt_aom_lpf_horizontal_6(
533                         p, dst_stride, params.mblim, params.lim, params.hev_thr);
534                 }
535                 break;
536                 // apply 8-tap filtering
537             case 8:
538                 if (is_16bit) {
539                     svt_aom_highbd_lpf_horizontal_8((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
540                                                     dst_stride,
541                                                     params.mblim,
542                                                     params.lim,
543                                                     params.hev_thr,
544                                                     scs_ptr->static_config.encoder_bit_depth);
545                 } else {
546                     svt_aom_lpf_horizontal_8(
547                         p, dst_stride, params.mblim, params.lim, params.hev_thr);
548                 }
549                 break;
550                 // apply 14-tap filtering
551             case 14:
552                 if (is_16bit) {
553                     svt_aom_highbd_lpf_horizontal_14((uint16_t *)(p), //CONVERT_TO_SHORTPTR(p),
554                                                      dst_stride,
555                                                      params.mblim,
556                                                      params.lim,
557                                                      params.hev_thr,
558                                                      scs_ptr->static_config.encoder_bit_depth);
559                 } else {
560                     svt_aom_lpf_horizontal_14(
561                         p, dst_stride, params.mblim, params.lim, params.hev_thr);
562                 }
563                 break;
564                 // no filtering
565             default: break;
566             }
567 
568             // advance the destination pointer
569             assert(tx_size < TX_SIZES_ALL);
570             advance_units = tx_size_high_unit[tx_size];
571             y += advance_units;
572             p += ((advance_units * dst_stride * MI_SIZE) << plane_ptr->is_16bit);
573         }
574     }
575 }
576 
577 // New function to filter each sb (64x64)
loop_filter_sb(EbPictureBufferDesc * frame_buffer,PictureControlSet * pcs_ptr,int32_t mi_row,int32_t mi_col,int32_t plane_start,int32_t plane_end,uint8_t last_col)578 void loop_filter_sb(EbPictureBufferDesc *frame_buffer, //reconpicture,
579                     //Yv12BufferConfig *frame_buffer,
580                     PictureControlSet *pcs_ptr,
581                     int32_t mi_row, int32_t mi_col,
582                     int32_t plane_start, int32_t plane_end, uint8_t last_col) {
583     FrameHeader *           frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
584     struct MacroblockdPlane pd[3];
585     int32_t                 plane;
586 
587     pd[0].subsampling_x = 0;
588     pd[0].subsampling_y = 0;
589     pd[0].plane_type    = PLANE_TYPE_Y;
590     pd[0].is_16bit      = frame_buffer->bit_depth > 8;
591     pd[1].subsampling_x = 1;
592     pd[1].subsampling_y = 1;
593     pd[1].plane_type    = PLANE_TYPE_UV;
594     pd[1].is_16bit      = frame_buffer->bit_depth > 8;
595     pd[2].subsampling_x = 1;
596     pd[2].subsampling_y = 1;
597     pd[2].plane_type    = PLANE_TYPE_UV;
598     pd[2].is_16bit      = frame_buffer->bit_depth > 8;
599 
600     if (pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.is_16bit_pipeline)
601         pd[0].is_16bit = pd[1].is_16bit = pd[2].is_16bit = EB_TRUE;
602 
603     for (plane = plane_start; plane < plane_end; plane++) {
604         if (plane == 0 && !(frm_hdr->loop_filter_params.filter_level[0]) &&
605             !(frm_hdr->loop_filter_params.filter_level[1]))
606             break;
607         else if (plane == 1 && !(frm_hdr->loop_filter_params.filter_level_u))
608             continue;
609         else if (plane == 2 && !(frm_hdr->loop_filter_params.filter_level_v))
610             continue;
611 
612         if (frm_hdr->loop_filter_params.combine_vert_horz_lf) {
613             // filter all vertical and horizontal edges in every 64x64 super block
614             // filter vertical edges
615             svt_av1_setup_dst_planes(pd,
616                                      pcs_ptr->parent_pcs_ptr->scs_ptr->seq_header.sb_size,
617                                      frame_buffer,
618                                      mi_row,
619                                      mi_col,
620                                      plane,
621                                      plane + 1);
622             svt_av1_filter_block_plane_vert(pcs_ptr, plane, &pd[plane], mi_row, mi_col);
623             // filter horizontal edges
624             int32_t max_mib_size = pcs_ptr->parent_pcs_ptr->scs_ptr->seq_header.sb_size ==
625                     BLOCK_128X128
626                 ? MAX_MIB_SIZE
627                 : SB64_MIB_SIZE;
628 
629             if (mi_col - max_mib_size >= 0) {
630                 svt_av1_setup_dst_planes(pd,
631                                          pcs_ptr->parent_pcs_ptr->scs_ptr->seq_header.sb_size,
632                                          frame_buffer,
633                                          mi_row,
634                                          mi_col - max_mib_size,
635                                          plane,
636                                          plane + 1);
637                 svt_av1_filter_block_plane_horz(
638                     pcs_ptr, plane, &pd[plane], mi_row, mi_col - max_mib_size);
639             }
640             // Filter the horizontal edges of the last sb in each row
641             if (last_col) {
642                 svt_av1_setup_dst_planes(pd,
643                                          pcs_ptr->parent_pcs_ptr->scs_ptr->seq_header.sb_size,
644                                          frame_buffer,
645                                          mi_row,
646                                          mi_col,
647                                          plane,
648                                          plane + 1);
649                 svt_av1_filter_block_plane_horz(pcs_ptr, plane, &pd[plane], mi_row, mi_col);
650             }
651         } else {
652             // filter all vertical edges in every 64x64 super block
653             svt_av1_setup_dst_planes(pd,
654                                      pcs_ptr->parent_pcs_ptr->scs_ptr->seq_header.sb_size,
655                                      frame_buffer,
656                                      mi_row,
657                                      mi_col,
658                                      plane,
659                                      plane + 1);
660 
661             svt_av1_filter_block_plane_vert(pcs_ptr, plane, &pd[plane], mi_row, mi_col);
662 
663             // filter all horizontal edges in every 64x64 super block
664             svt_av1_setup_dst_planes(pd,
665                                      pcs_ptr->parent_pcs_ptr->scs_ptr->seq_header.sb_size,
666                                      frame_buffer,
667                                      mi_row,
668                                      mi_col,
669                                      plane,
670                                      plane + 1);
671             svt_av1_filter_block_plane_horz(pcs_ptr, plane, &pd[plane], mi_row, mi_col);
672         }
673     }
674 }
675 
svt_av1_loop_filter_frame(EbPictureBufferDesc * frame_buffer,PictureControlSet * pcs_ptr,int32_t plane_start,int32_t plane_end)676 void svt_av1_loop_filter_frame(EbPictureBufferDesc *frame_buffer, PictureControlSet *pcs_ptr,
677                                int32_t plane_start, int32_t plane_end) {
678     SequenceControlSet *scs_ptr = (SequenceControlSet *)
679                                       pcs_ptr->parent_pcs_ptr->scs_wrapper_ptr->object_ptr;
680     //SuperBlock                     *sb_ptr;
681     //uint16_t                                   sb_index;
682     uint8_t  sb_size_log2 = (uint8_t)svt_log2f(scs_ptr->sb_size_pix);
683     uint32_t x_sb_index;
684     uint32_t y_sb_index;
685     uint32_t sb_origin_x;
686     uint32_t sb_origin_y;
687     EbBool   end_of_row_flag;
688 
689     uint32_t pic_width_in_sb = (pcs_ptr->parent_pcs_ptr->aligned_width + scs_ptr->sb_size_pix - 1) /
690         scs_ptr->sb_size_pix;
691     uint32_t picture_height_in_sb = (pcs_ptr->parent_pcs_ptr->aligned_height +
692                                      scs_ptr->sb_size_pix - 1) /
693         scs_ptr->sb_size_pix;
694 
695     svt_av1_loop_filter_frame_init(&pcs_ptr->parent_pcs_ptr->frm_hdr,
696                                    &pcs_ptr->parent_pcs_ptr->lf_info,
697                                    plane_start,
698                                    plane_end);
699 
700     for (y_sb_index = 0; y_sb_index < picture_height_in_sb; ++y_sb_index) {
701         for (x_sb_index = 0; x_sb_index < pic_width_in_sb; ++x_sb_index) {
702             //sb_index        = (uint16_t)(y_sb_index * pic_width_in_sb + x_sb_index);
703             //sb_ptr          = pcs_ptr->sb_ptr_array[sb_index];
704             sb_origin_x     = x_sb_index << sb_size_log2;
705             sb_origin_y     = y_sb_index << sb_size_log2;
706             end_of_row_flag = (x_sb_index == pic_width_in_sb - 1) ? EB_TRUE : EB_FALSE;
707 
708             loop_filter_sb(frame_buffer,
709                            pcs_ptr,
710                            sb_origin_y >> 2,
711                            sb_origin_x >> 2,
712                            plane_start,
713                            plane_end,
714                            end_of_row_flag);
715         }
716     }
717 }
718 extern int16_t svt_av1_ac_quant_q3(int32_t qindex, int32_t delta, AomBitDepth bit_depth);
719 
svt_copy_buffer(EbPictureBufferDesc * srcBuffer,EbPictureBufferDesc * dstBuffer,PictureControlSet * pcs_ptr,uint8_t plane)720 void svt_copy_buffer(EbPictureBufferDesc *srcBuffer, EbPictureBufferDesc *dstBuffer,
721                      PictureControlSet *pcs_ptr, uint8_t plane) {
722     EbBool is_16bit = (EbBool)(pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.encoder_bit_depth >
723                                EB_8BIT);
724     if (pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.is_16bit_pipeline)
725         is_16bit = EB_TRUE;
726     dstBuffer->origin_x    = srcBuffer->origin_x;
727     dstBuffer->origin_y    = srcBuffer->origin_y;
728     dstBuffer->width       = srcBuffer->width;
729     dstBuffer->height      = srcBuffer->height;
730     dstBuffer->max_width   = srcBuffer->max_width;
731     dstBuffer->max_height  = srcBuffer->max_height;
732     dstBuffer->bit_depth   = srcBuffer->bit_depth;
733     dstBuffer->luma_size   = srcBuffer->luma_size;
734     dstBuffer->chroma_size = srcBuffer->chroma_size;
735     dstBuffer->packed_flag = srcBuffer->packed_flag;
736 
737     uint32_t luma_buffer_offset = (srcBuffer->origin_x + srcBuffer->origin_y * srcBuffer->stride_y)
738         << is_16bit;
739     uint16_t luma_width  = (uint16_t)(srcBuffer->width) << is_16bit;
740     uint16_t luma_height = (uint16_t)(srcBuffer->height);
741 
742     uint16_t chroma_width = (luma_width >> 1);
743     if (plane == 0) {
744         uint16_t stride_y = srcBuffer->stride_y << is_16bit;
745 
746         dstBuffer->stride_y         = srcBuffer->stride_y;
747         dstBuffer->stride_bit_inc_y = srcBuffer->stride_bit_inc_y;
748 
749         for (int32_t input_row_index = 0; input_row_index < luma_height; input_row_index++) {
750             svt_memcpy((dstBuffer->buffer_y + luma_buffer_offset + stride_y * input_row_index),
751                        (srcBuffer->buffer_y + luma_buffer_offset + stride_y * input_row_index),
752                        luma_width);
753         }
754     } else if (plane == 1) {
755         uint16_t stride_cb           = srcBuffer->stride_cb << is_16bit;
756         dstBuffer->stride_cb         = srcBuffer->stride_cb;
757         dstBuffer->stride_bit_inc_cb = srcBuffer->stride_bit_inc_cb;
758 
759         uint32_t chroma_buffer_offset =
760             (srcBuffer->origin_x / 2 + srcBuffer->origin_y / 2 * srcBuffer->stride_cb) << is_16bit;
761 
762         for (int32_t input_row_index = 0; input_row_index < luma_height / 2; input_row_index++) {
763             svt_memcpy((dstBuffer->buffer_cb + chroma_buffer_offset + stride_cb * input_row_index),
764                        (srcBuffer->buffer_cb + chroma_buffer_offset + stride_cb * input_row_index),
765                        chroma_width);
766         }
767     } else if (plane == 2) {
768         uint16_t stride_cr = srcBuffer->stride_cr << is_16bit;
769 
770         dstBuffer->stride_cr         = srcBuffer->stride_cr;
771         dstBuffer->stride_bit_inc_cr = srcBuffer->stride_bit_inc_cr;
772 
773         uint32_t chroma_buffer_offset =
774             (srcBuffer->origin_x / 2 + srcBuffer->origin_y / 2 * srcBuffer->stride_cr) << is_16bit;
775 
776         for (int32_t input_row_index = 0; input_row_index < luma_height / 2; input_row_index++) {
777             svt_memcpy((dstBuffer->buffer_cr + chroma_buffer_offset + stride_cr * input_row_index),
778                        (srcBuffer->buffer_cr + chroma_buffer_offset + stride_cr * input_row_index),
779                        chroma_width);
780         }
781     }
782 }
783 
784 //int32_t av1_get_max_filter_level(const Av1Comp *cpi) {
785 //    if (cpi->oxcf.pass == 2) {
786 //        return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
787 //            : MAX_LOOP_FILTER;
788 //    }
789 //    else {
790 //        return MAX_LOOP_FILTER;
791 //    }
792 //}
793 
picture_sse_calculations(PictureControlSet * pcs_ptr,EbPictureBufferDesc * recon_ptr,int32_t plane)794 uint64_t picture_sse_calculations(PictureControlSet *pcs_ptr, EbPictureBufferDesc *recon_ptr,
795                                   int32_t plane)
796 
797 {
798     SequenceControlSet *scs_ptr  = pcs_ptr->parent_pcs_ptr->scs_ptr;
799     EbBool              is_16bit = scs_ptr->static_config.is_16bit_pipeline ||
800         (scs_ptr->static_config.encoder_bit_depth > EB_8BIT);
801 
802     const uint32_t ss_x = scs_ptr->subsampling_x;
803     const uint32_t ss_y = scs_ptr->subsampling_y;
804 
805     uint8_t *input_buffer;
806     uint8_t *recon_coeff_buffer;
807 
808     if (!is_16bit) {
809         EbPictureBufferDesc *input_picture_ptr = (EbPictureBufferDesc *)
810                                                      pcs_ptr->parent_pcs_ptr->enhanced_picture_ptr;
811 
812         if (plane == 0) {
813             recon_coeff_buffer = (uint8_t *)&(
814                 (recon_ptr
815                      ->buffer_y)[recon_ptr->origin_x + recon_ptr->origin_y * recon_ptr->stride_y]);
816             input_buffer = (uint8_t *)&(
817                 (input_picture_ptr
818                      ->buffer_y)[input_picture_ptr->origin_x +
819                                  input_picture_ptr->origin_y * input_picture_ptr->stride_y]);
820 
821             return svt_spatial_full_distortion_kernel(input_buffer,
822                                                       0,
823                                                       input_picture_ptr->stride_y,
824                                                       recon_coeff_buffer,
825                                                       0,
826                                                       recon_ptr->stride_y,
827                                                       input_picture_ptr->width,
828                                                       input_picture_ptr->height);
829         } else if (plane == 1) {
830             recon_coeff_buffer = (uint8_t *)&(
831                 (recon_ptr->buffer_cb)[recon_ptr->origin_x / 2 +
832                                        recon_ptr->origin_y / 2 * recon_ptr->stride_cb]);
833             input_buffer = (uint8_t *)&(
834                 (input_picture_ptr
835                      ->buffer_cb)[input_picture_ptr->origin_x / 2 +
836                                   input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cb]);
837 
838             return svt_spatial_full_distortion_kernel(input_buffer,
839                                                       0,
840                                                       input_picture_ptr->stride_cb,
841                                                       recon_coeff_buffer,
842                                                       0,
843                                                       recon_ptr->stride_cb,
844                                                       input_picture_ptr->width >> ss_x,
845                                                       input_picture_ptr->height >> ss_y);
846         } else if (plane == 2) {
847             recon_coeff_buffer = (uint8_t *)&(
848                 (recon_ptr->buffer_cr)[recon_ptr->origin_x / 2 +
849                                        recon_ptr->origin_y / 2 * recon_ptr->stride_cr]);
850             input_buffer = (uint8_t *)&(
851                 (input_picture_ptr
852                      ->buffer_cr)[input_picture_ptr->origin_x / 2 +
853                                   input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cr]);
854 
855             return svt_spatial_full_distortion_kernel(input_buffer,
856                                                       0,
857                                                       input_picture_ptr->stride_cr,
858                                                       recon_coeff_buffer,
859                                                       0,
860                                                       recon_ptr->stride_cr,
861                                                       input_picture_ptr->width >> ss_x,
862                                                       input_picture_ptr->height >> ss_y);
863         }
864         return 0;
865     } else {
866         EbPictureBufferDesc *input_picture_ptr = (EbPictureBufferDesc *)pcs_ptr->input_frame16bit;
867 
868         if (plane == 0) {
869             recon_coeff_buffer = (uint8_t *)&(
870                 (recon_ptr
871                      ->buffer_y)[(recon_ptr->origin_x + recon_ptr->origin_y * recon_ptr->stride_y)
872                                  << is_16bit]);
873             input_buffer = (uint8_t *)&(
874                 (input_picture_ptr
875                      ->buffer_y)[(input_picture_ptr->origin_x +
876                                   input_picture_ptr->origin_y * input_picture_ptr->stride_y)
877                                  << is_16bit]);
878 
879             return svt_full_distortion_kernel16_bits(input_buffer,
880                                                      0,
881                                                      input_picture_ptr->stride_y,
882                                                      recon_coeff_buffer,
883                                                      0,
884                                                      recon_ptr->stride_y,
885                                                      input_picture_ptr->width,
886                                                      input_picture_ptr->height);
887         } else if (plane == 1) {
888             recon_coeff_buffer = (uint8_t *)&(
889                 (recon_ptr->buffer_cb)[(recon_ptr->origin_x / 2 +
890                                         recon_ptr->origin_y / 2 * recon_ptr->stride_cb)
891                                        << is_16bit]);
892             input_buffer = (uint8_t *)&(
893                 (input_picture_ptr
894                      ->buffer_cb)[(input_picture_ptr->origin_x / 2 +
895                                    input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cb)
896                                   << is_16bit]);
897 
898             return svt_full_distortion_kernel16_bits(input_buffer,
899                                                      0,
900                                                      input_picture_ptr->stride_cb,
901                                                      recon_coeff_buffer,
902                                                      0,
903                                                      recon_ptr->stride_cb,
904                                                      input_picture_ptr->width >> ss_x,
905                                                      input_picture_ptr->height >> ss_y);
906         } else if (plane == 2) {
907             recon_coeff_buffer = (uint8_t *)&(
908                 (recon_ptr->buffer_cr)[(recon_ptr->origin_x / 2 +
909                                         recon_ptr->origin_y / 2 * recon_ptr->stride_cr)
910                                        << is_16bit]);
911             input_buffer = (uint8_t *)&(
912                 (input_picture_ptr
913                      ->buffer_cr)[(input_picture_ptr->origin_x / 2 +
914                                    input_picture_ptr->origin_y / 2 * input_picture_ptr->stride_cr)
915                                   << is_16bit]);
916 
917             return svt_full_distortion_kernel16_bits(input_buffer,
918                                                      0,
919                                                      input_picture_ptr->stride_cr,
920                                                      recon_coeff_buffer,
921                                                      0,
922                                                      recon_ptr->stride_cr,
923                                                      input_picture_ptr->width >> ss_x,
924                                                      input_picture_ptr->height >> ss_y);
925         }
926         return 0;
927     }
928 }
929 
try_filter_frame(const EbPictureBufferDesc * sd,EbPictureBufferDesc * temp_lf_recon_buffer,PictureControlSet * pcs_ptr,int32_t filt_level,int32_t partial_frame,int32_t plane,int32_t dir)930 static int64_t try_filter_frame(
931     //const Yv12BufferConfig *sd,
932     //Av1Comp *const cpi,
933     const EbPictureBufferDesc *sd, EbPictureBufferDesc *temp_lf_recon_buffer,
934     PictureControlSet *pcs_ptr, int32_t filt_level, int32_t partial_frame, int32_t plane,
935     int32_t dir) {
936     (void)sd;
937     (void)partial_frame;
938     (void)sd;
939     int64_t      filt_err;
940     FrameHeader *frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
941     assert(plane >= 0 && plane <= 2);
942     int32_t filter_level[2] = {filt_level, filt_level};
943     if (plane == 0 && dir == 0)
944         filter_level[1] = frm_hdr->loop_filter_params.filter_level[1];
945     if (plane == 0 && dir == 1)
946         filter_level[0] = frm_hdr->loop_filter_params.filter_level[0];
947 
948     EbBool is_16bit = (EbBool)(pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.encoder_bit_depth >
949                                EB_8BIT);
950     EbPictureBufferDesc *recon_buffer;
951     if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE) {
952         //get the 16bit form of the input SB
953         if (pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.is_16bit_pipeline || is_16bit) {
954             recon_buffer = ((EbReferenceObject *)
955                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
956                                ->reference_picture16bit;
957         } else {
958             recon_buffer = ((EbReferenceObject *)
959                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
960                                ->reference_picture;
961         }
962     } else { // non ref pictures
963         recon_buffer = (pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.is_16bit_pipeline ||
964                         is_16bit)
965             ? pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr
966             : pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
967     }
968 
969     // set base filters for use of get_filter_level when in DELTA_Q_LF mode
970     switch (plane) {
971     case 0:
972         frm_hdr->loop_filter_params.filter_level[0] = filter_level[0];
973         frm_hdr->loop_filter_params.filter_level[1] = filter_level[1];
974         break;
975     case 1: frm_hdr->loop_filter_params.filter_level_u = filter_level[0]; break;
976     case 2: frm_hdr->loop_filter_params.filter_level_v = filter_level[0]; break;
977     }
978 
979     svt_av1_loop_filter_frame(recon_buffer, pcs_ptr, plane, plane + 1);
980 
981     filt_err = picture_sse_calculations(pcs_ptr, recon_buffer, plane);
982 
983     // Re-instate the unfiltered frame
984     svt_copy_buffer(temp_lf_recon_buffer /*cpi->last_frame_uf*/,
985                     recon_buffer /*cm->frame_to_show*/,
986                     pcs_ptr,
987                     (uint8_t)plane);
988 
989     return filt_err;
990 }
search_filter_level(EbPictureBufferDesc * sd,EbPictureBufferDesc * temp_lf_recon_buffer,PictureControlSet * pcs_ptr,int32_t partial_frame,const int32_t * last_frame_filter_level,double * best_cost_ret,int32_t plane,int32_t dir)991 static int32_t search_filter_level(
992     //const Yv12BufferConfig *sd, Av1Comp *cpi,
993     EbPictureBufferDesc *sd, // source
994     EbPictureBufferDesc *temp_lf_recon_buffer, PictureControlSet *pcs_ptr, int32_t partial_frame,
995     const int32_t *last_frame_filter_level, double *best_cost_ret, int32_t plane, int32_t dir) {
996     const int32_t min_filter_level = 0;
997     const int32_t max_filter_level = MAX_LOOP_FILTER; // av1_get_max_filter_level(cpi);
998     int32_t       filt_direction   = 0;
999     int64_t       best_err;
1000     int32_t       filt_best;
1001     FrameHeader * frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
1002     //Macroblock *x = &cpi->td.mb;
1003 
1004     // Start the search at the previous frame filter level unless it is now out of
1005     // range.
1006     int32_t lvl;
1007     switch (plane) {
1008     case 0: lvl = last_frame_filter_level[dir]; break;
1009     case 1: lvl = last_frame_filter_level[2]; break;
1010     case 2: lvl = last_frame_filter_level[3]; break;
1011     default: assert(plane >= 0 && plane <= 2); return 0;
1012     }
1013     int32_t filt_mid    = clamp(lvl, min_filter_level, max_filter_level);
1014     int32_t filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
1015 
1016     EbBool is_16bit = (EbBool)(pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.encoder_bit_depth >
1017                                EB_8BIT);
1018     EbPictureBufferDesc *recon_buffer;
1019 
1020     if (pcs_ptr->parent_pcs_ptr->is_used_as_reference_flag == EB_TRUE) {
1021         //get the 16bit form of the input SB
1022         if (pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.is_16bit_pipeline || is_16bit)
1023             recon_buffer = ((EbReferenceObject *)
1024                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
1025                                ->reference_picture16bit;
1026         else
1027             recon_buffer = ((EbReferenceObject *)
1028                                 pcs_ptr->parent_pcs_ptr->reference_picture_wrapper_ptr->object_ptr)
1029                                ->reference_picture;
1030     } else { // non ref pictures
1031         recon_buffer = (pcs_ptr->parent_pcs_ptr->scs_ptr->static_config.is_16bit_pipeline ||
1032                         is_16bit)
1033             ? pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture16bit_ptr
1034             : pcs_ptr->parent_pcs_ptr->enc_dec_ptr->recon_picture_ptr;
1035     }
1036     // Sum squared error at each filter level
1037     int64_t ss_err[MAX_LOOP_FILTER + 1];
1038 
1039     // Set each entry to -1
1040     memset(ss_err, 0xFF, sizeof(ss_err));
1041     // make a copy of recon_buffer
1042     svt_copy_buffer(recon_buffer /*cm->frame_to_show*/,
1043                     temp_lf_recon_buffer /*&cpi->last_frame_uf*/,
1044                     pcs_ptr,
1045                     (uint8_t)plane);
1046 
1047     best_err = try_filter_frame(
1048         sd, temp_lf_recon_buffer, pcs_ptr, filt_mid, partial_frame, plane, dir);
1049     filt_best        = filt_mid;
1050     ss_err[filt_mid] = best_err;
1051 
1052     if (pcs_ptr->parent_pcs_ptr->loop_filter_mode <= 2) {
1053         filter_step             = 2;
1054         const int32_t filt_high = AOMMIN(filt_mid + filter_step, max_filter_level);
1055         const int32_t filt_low  = AOMMAX(filt_mid - filter_step, min_filter_level);
1056 
1057         // Bias against raising loop filter in favor of lowering it.
1058         int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
1059 
1060         //if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
1061         //    bias = (bias * cpi->twopass.section_intra_rating) / 20;
1062 
1063         // yx, bias less for large block size
1064         if (frm_hdr->tx_mode != ONLY_4X4)
1065             bias >>= 1;
1066 
1067         if (filt_direction <= 0 && filt_low != filt_mid) {
1068             // Get Low filter error score
1069             if (ss_err[filt_low] < 0) {
1070                 ss_err[filt_low] = try_filter_frame(
1071                     sd, temp_lf_recon_buffer, pcs_ptr, filt_low, partial_frame, plane, dir);
1072             }
1073             // If value is close to the best so far then bias towards a lower loop
1074             // filter value.
1075             if (ss_err[filt_low] < (best_err + bias)) {
1076                 // Was it actually better than the previous best?
1077                 if (ss_err[filt_low] < best_err)
1078                     best_err = ss_err[filt_low];
1079                 filt_best = filt_low;
1080             }
1081         }
1082 
1083         // Now look at filt_high
1084         if (filt_direction >= 0 && filt_high != filt_mid) {
1085             if (ss_err[filt_high] < 0) {
1086                 ss_err[filt_high] = try_filter_frame(
1087                     sd, temp_lf_recon_buffer, pcs_ptr, filt_high, partial_frame, plane, dir);
1088             }
1089             // If value is significantly better than previous best, bias added against
1090             // raising filter value
1091             if (ss_err[filt_high] < (best_err - bias))
1092                 filt_best = filt_high;
1093         }
1094     } else {
1095         while (filter_step > 0) {
1096             const int32_t filt_high = AOMMIN(filt_mid + filter_step, max_filter_level);
1097             const int32_t filt_low  = AOMMAX(filt_mid - filter_step, min_filter_level);
1098 
1099             // Bias against raising loop filter in favor of lowering it.
1100             int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
1101 
1102             //if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
1103             //    bias = (bias * cpi->twopass.section_intra_rating) / 20;
1104 
1105             // yx, bias less for large block size
1106             if (frm_hdr->tx_mode != ONLY_4X4)
1107                 bias >>= 1;
1108 
1109             if (filt_direction <= 0 && filt_low != filt_mid) {
1110                 // Get Low filter error score
1111                 if (ss_err[filt_low] < 0) {
1112                     ss_err[filt_low] = try_filter_frame(
1113                         sd, temp_lf_recon_buffer, pcs_ptr, filt_low, partial_frame, plane, dir);
1114                 }
1115                 // If value is close to the best so far then bias towards a lower loop
1116                 // filter value.
1117                 if (ss_err[filt_low] < (best_err + bias)) {
1118                     // Was it actually better than the previous best?
1119                     if (ss_err[filt_low] < best_err)
1120                         best_err = ss_err[filt_low];
1121                     filt_best = filt_low;
1122                 }
1123             }
1124 
1125             // Now look at filt_high
1126             if (filt_direction >= 0 && filt_high != filt_mid) {
1127                 if (ss_err[filt_high] < 0) {
1128                     ss_err[filt_high] = try_filter_frame(
1129                         sd, temp_lf_recon_buffer, pcs_ptr, filt_high, partial_frame, plane, dir);
1130                 }
1131                 // If value is significantly better than previous best, bias added against
1132                 // raising filter value
1133                 if (ss_err[filt_high] < (best_err - bias)) {
1134                     best_err  = ss_err[filt_high];
1135                     filt_best = filt_high;
1136                 }
1137             }
1138 
1139             // Half the step distance if the best filter value was the same as last time
1140             if (filt_best == filt_mid) {
1141                 filter_step /= 2;
1142                 filt_direction = 0;
1143             } else {
1144                 filt_direction = (filt_best < filt_mid) ? -1 : 1;
1145                 filt_mid       = filt_best;
1146             }
1147         }
1148     }
1149     // Update best error
1150     best_err = ss_err[filt_best];
1151 
1152     if (best_cost_ret)
1153         *best_cost_ret = (double)best_err; //RDCOST_DBL(x->rdmult, 0, best_err);
1154     return filt_best;
1155 }
svt_av1_pick_filter_level(EbPictureBufferDesc * srcBuffer,PictureControlSet * pcs_ptr,LpfPickMethod method)1156 void svt_av1_pick_filter_level(
1157                                EbPictureBufferDesc *srcBuffer, // source input
1158                                PictureControlSet *pcs_ptr, LpfPickMethod method) {
1159     SequenceControlSet *scs_ptr = (SequenceControlSet *)
1160                                       pcs_ptr->parent_pcs_ptr->scs_wrapper_ptr->object_ptr;
1161     FrameHeader *frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
1162 
1163     (void)srcBuffer;
1164     struct LoopFilter *const lf = &frm_hdr->loop_filter_params;
1165     lf->sharpness_level         = 0;
1166 
1167     if (method == LPF_PICK_MINIMAL_LPF)
1168         lf->filter_level[0] = lf->filter_level[1] = 0;
1169     else if (method >= LPF_PICK_FROM_Q) {
1170         const int32_t min_filter_level = 0;
1171         const int32_t max_filter_level = MAX_LOOP_FILTER; // av1_get_max_filter_level(cpi);
1172         const int32_t q                = svt_av1_ac_quant_q3(
1173             frm_hdr->quantization_params.base_q_idx,
1174             0,
1175             (AomBitDepth)scs_ptr->static_config.encoder_bit_depth);
1176         // These values were determined by linear fitting the result of the
1177         // searched level for 8 bit depth:
1178         // Keyframes: filt_guess = q * 0.06699 - 1.60817
1179         // Other frames: filt_guess = q * 0.02295 + 2.48225
1180         //
1181         // And high bit depth separately:
1182         // filt_guess = q * 0.316206 + 3.87252
1183         int32_t filt_guess;
1184         switch (scs_ptr->static_config.encoder_bit_depth) {
1185         case EB_8BIT:
1186             filt_guess = (frm_hdr->frame_type == KEY_FRAME)
1187                 ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18)
1188                 : ROUND_POWER_OF_TWO(q * 6017 + 650707, 18);
1189             break;
1190         case EB_10BIT: filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); break;
1191         case EB_12BIT: filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); break;
1192         default:
1193             assert(0 &&
1194                    "bit_depth should be AOM_BITS_8, AOM_BITS_10 "
1195                    "or AOM_BITS_12");
1196             return;
1197         }
1198         if (scs_ptr->static_config.encoder_bit_depth != EB_8BIT && frm_hdr->frame_type == KEY_FRAME)
1199             filt_guess -= 4;
1200 
1201         filt_guess = filt_guess > 2 ? filt_guess - 2 : filt_guess > 1 ? filt_guess - 1 : filt_guess;
1202         int32_t filt_guess_chroma = filt_guess > 1 ? filt_guess / 2 : filt_guess;
1203 
1204         lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level);
1205         lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level);
1206         lf->filter_level_u  = clamp(filt_guess_chroma, min_filter_level, max_filter_level);
1207         lf->filter_level_v  = clamp(filt_guess_chroma, min_filter_level, max_filter_level);
1208     } else {
1209         const int32_t last_frame_filter_level[4] = {
1210             lf->filter_level[0], lf->filter_level[1], lf->filter_level_u, lf->filter_level_v};
1211         EbPictureBufferDesc *temp_lf_recon_buffer = (scs_ptr->static_config.is_16bit_pipeline ||
1212                                                      scs_ptr->static_config.encoder_bit_depth !=
1213                                                          EB_8BIT)
1214             ? pcs_ptr->temp_lf_recon_picture16bit_ptr
1215             : pcs_ptr->temp_lf_recon_picture_ptr;
1216 
1217         lf->filter_level[0] = lf->filter_level[1] = search_filter_level(
1218             srcBuffer,
1219             temp_lf_recon_buffer,
1220             pcs_ptr,
1221             method == LPF_PICK_FROM_SUBIMAGE,
1222             last_frame_filter_level,
1223             NULL,
1224             0,
1225             2);
1226 
1227         lf->filter_level_u = search_filter_level(srcBuffer,
1228                                                  temp_lf_recon_buffer,
1229                                                  pcs_ptr,
1230                                                  method == LPF_PICK_FROM_SUBIMAGE,
1231                                                  last_frame_filter_level,
1232                                                  NULL,
1233                                                  1,
1234                                                  0);
1235         lf->filter_level_v = search_filter_level(srcBuffer,
1236                                                  temp_lf_recon_buffer,
1237                                                  pcs_ptr,
1238                                                  method == LPF_PICK_FROM_SUBIMAGE,
1239                                                  last_frame_filter_level,
1240                                                  NULL,
1241                                                  2,
1242                                                  0);
1243     }
1244 }
1245