1 /*
2 * Copyright(c) 2019 Intel Corporation
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10 */
11 
12 #include "EbPictureControlSet.h"
13 #include "EbSequenceControlSet.h"
14 
15 #include "EbSourceBasedOperationsProcess.h"
16 #include "EbInitialRateControlResults.h"
17 #include "EbPictureDemuxResults.h"
18 #ifdef ARCH_X86_64
19 #include <emmintrin.h>
20 #endif
21 #include "EbEncHandle.h"
22 #include "EbUtility.h"
23 #include "EbPictureManagerProcess.h"
24 #include "EbReferenceObject.h"
25 #include "EbTransforms.h"
26 #include "aom_dsp_rtcd.h"
27 #include "EbLog.h"
28 #include "EbIntraPrediction.h"
29 #include "EbMotionEstimation.h"
30 #include "EbEncDecResults.h"
31 #include "EbRateDistortionCost.h"
32 
33 /**************************************
34  * Context
35  **************************************/
36 
37 typedef struct SourceBasedOperationsContext {
38     EbDctor dctor;
39     EbFifo *initial_rate_control_results_input_fifo_ptr;
40     EbFifo *picture_demux_results_output_fifo_ptr;
41     EbFifo *sbo_output_fifo_ptr;
42     // local zz cost array
43     uint32_t complete_sb_count;
44     uint8_t *y_mean_ptr;
45     uint8_t *cr_mean_ptr;
46     uint8_t *cb_mean_ptr;
47 } SourceBasedOperationsContext;
48 typedef struct TplDispenserContext {
49     EbDctor dctor;
50     EbFifo *tpl_disp_input_fifo_ptr;
51     EbFifo *tpl_disp_fb_fifo_ptr;
52     uint32_t sb_index;
53     uint32_t coded_sb_count;
54 } TplDispenserContext;
55 
source_based_operations_context_dctor(EbPtr p)56 static void source_based_operations_context_dctor(EbPtr p) {
57     EbThreadContext *             thread_context_ptr = (EbThreadContext *)p;
58     SourceBasedOperationsContext *obj = (SourceBasedOperationsContext *)thread_context_ptr->priv;
59     EB_FREE_ARRAY(obj);
60 }
61 
62 /************************************************
63 * Source Based Operation Context Constructor
64 ************************************************/
source_based_operations_context_ctor(EbThreadContext * thread_context_ptr,const EbEncHandle * enc_handle_ptr,int index)65 EbErrorType source_based_operations_context_ctor(EbThreadContext *  thread_context_ptr,
66                                                  const EbEncHandle *enc_handle_ptr, int index) {
67     SourceBasedOperationsContext *context_ptr;
68     EB_CALLOC_ARRAY(context_ptr, 1);
69     thread_context_ptr->priv  = context_ptr;
70     thread_context_ptr->dctor = source_based_operations_context_dctor;
71 
72     context_ptr->initial_rate_control_results_input_fifo_ptr =
73         svt_system_resource_get_consumer_fifo(
74             enc_handle_ptr->initial_rate_control_results_resource_ptr, index);
75 
76     context_ptr->sbo_output_fifo_ptr= svt_system_resource_get_producer_fifo(
77         enc_handle_ptr->tpl_disp_res_srm, index);
78     context_ptr->picture_demux_results_output_fifo_ptr = svt_system_resource_get_producer_fifo(
79         enc_handle_ptr->picture_demux_results_resource_ptr, index);
80     return EB_ErrorNone;
81 }
82 
83 /***************************************************
84 * Derives BEA statistics and set activity flags
85 ***************************************************/
derive_picture_activity_statistics(PictureParentControlSet * pcs_ptr)86 void derive_picture_activity_statistics(PictureParentControlSet *pcs_ptr)
87 
88 {
89     uint64_t non_moving_index_min = ~0u;
90     uint64_t non_moving_index_max = 0;
91     uint64_t non_moving_index_sum = 0;
92     uint32_t complete_sb_count    = 0;
93     uint32_t non_moving_sb_count  = 0;
94     uint32_t sb_total_count       = pcs_ptr->sb_total_count;
95 
96     for (uint32_t sb_index = 0; sb_index < sb_total_count; ++sb_index) {
97         SbParams *sb_params = &pcs_ptr->sb_params_array[sb_index];
98         if (sb_params->is_complete_sb) {
99             non_moving_index_min = pcs_ptr->non_moving_index_array[sb_index] < non_moving_index_min
100                 ? pcs_ptr->non_moving_index_array[sb_index]
101                 : non_moving_index_min;
102 
103             non_moving_index_max = pcs_ptr->non_moving_index_array[sb_index] > non_moving_index_max
104                 ? pcs_ptr->non_moving_index_array[sb_index]
105                 : non_moving_index_max;
106             if (pcs_ptr->non_moving_index_array[sb_index] < NON_MOVING_SCORE_1)
107                 non_moving_sb_count++;
108             complete_sb_count++;
109 
110             non_moving_index_sum += pcs_ptr->non_moving_index_array[sb_index];
111         }
112     }
113 
114     if (complete_sb_count > 0) {
115         pcs_ptr->non_moving_index_average = (uint16_t)(non_moving_index_sum / complete_sb_count);
116         pcs_ptr->kf_zeromotion_pct        = (non_moving_sb_count * 100) / complete_sb_count;
117     }
118     pcs_ptr->non_moving_index_min_distance = (uint16_t)(
119         ABS((int32_t)(pcs_ptr->non_moving_index_average) - (int32_t)non_moving_index_min));
120     pcs_ptr->non_moving_index_max_distance = (uint16_t)(
121         ABS((int32_t)(pcs_ptr->non_moving_index_average) - (int32_t)non_moving_index_max));
122     return;
123 }
124 
125 /*
126      TPL dispenser context dctor
127 */
tpl_disp_context_dctor(EbPtr p)128 static void tpl_disp_context_dctor(EbPtr p) {
129     EbThreadContext *          thread_context_ptr = (EbThreadContext *)p;
130     TplDispenserContext *obj = (TplDispenserContext *)thread_context_ptr->priv;
131     EB_FREE_ARRAY(obj);
132 }
133 /*
134      TPL dispenser context cctor
135 */
tpl_disp_context_ctor(EbThreadContext * thread_context_ptr,const EbEncHandle * enc_handle_ptr,int index,int tasks_index)136 EbErrorType tpl_disp_context_ctor(EbThreadContext *  thread_context_ptr,
137         const EbEncHandle *enc_handle_ptr, int index, int tasks_index) {
138     TplDispenserContext *context_ptr;
139     EB_CALLOC_ARRAY(context_ptr, 1);
140 
141     thread_context_ptr->priv  = context_ptr;
142     thread_context_ptr->dctor = tpl_disp_context_dctor;
143 
144     context_ptr->tpl_disp_input_fifo_ptr = svt_system_resource_get_consumer_fifo(
145         enc_handle_ptr->tpl_disp_res_srm, index);
146 
147     context_ptr->tpl_disp_fb_fifo_ptr = svt_system_resource_get_producer_fifo(
148         enc_handle_ptr->tpl_disp_res_srm, tasks_index);
149 
150     return EB_ErrorNone;
151 }
152 
153 
154 void tpl_prep_info(PictureParentControlSet    *pcs) ;
155 
156 
157 // Generate lambda factor to tune lambda based on TPL stats
generate_lambda_scaling_factor(PictureParentControlSet * pcs_ptr,int64_t mc_dep_cost_base)158 static void generate_lambda_scaling_factor(PictureParentControlSet *pcs_ptr,
159                                            int64_t                  mc_dep_cost_base) {
160     Av1Common *cm         = pcs_ptr->av1_cm;
161     const int  step       = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
162     const int  mi_cols_sr = ((pcs_ptr->aligned_width + 15) / 16) << 2;
163 
164     const int    block_size = BLOCK_16X16;
165     const int    num_mi_w   = mi_size_wide[block_size];
166     const int    num_mi_h   = mi_size_high[block_size];
167     const int    num_cols   = (mi_cols_sr + num_mi_w - 1) / num_mi_w;
168     const int    num_rows   = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
169     const int    stride     = mi_cols_sr >> (1 + pcs_ptr->is_720p_or_larger);
170     const double c          = 1.2;
171 
172     for (int row = 0; row < num_rows; row++) {
173         for (int col = 0; col < num_cols; col++) {
174             double    intra_cost  = 0.0;
175             double    mc_dep_cost = 0.0;
176             const int index       = row * num_cols + col;
177             for (int mi_row = row * num_mi_h; mi_row < (row + 1) * num_mi_h; mi_row += step) {
178                 for (int mi_col = col * num_mi_w; mi_col < (col + 1) * num_mi_w; mi_col += step) {
179                     if (mi_row >= cm->mi_rows || mi_col >= mi_cols_sr)
180                         continue;
181 
182                     const int index1 = (mi_row >> (1 + pcs_ptr->is_720p_or_larger)) * stride +
183                         (mi_col >> (1 + pcs_ptr->is_720p_or_larger));
184                     TplStats *tpl_stats_ptr = pcs_ptr->tpl_stats[index1];
185                     int64_t   mc_dep_delta  = RDCOST(pcs_ptr->base_rdmult,
186                                                   tpl_stats_ptr->mc_dep_rate,
187                                                   tpl_stats_ptr->mc_dep_dist);
188                     intra_cost += (double)(tpl_stats_ptr->recrf_dist << RDDIV_BITS);
189                     mc_dep_cost += (double)(tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
190                 }
191             }
192             double rk = 0;
193             if (mc_dep_cost > 0 && intra_cost > 0) {
194                 rk = intra_cost / mc_dep_cost;
195             }
196 
197             pcs_ptr->tpl_rdmult_scaling_factors[index] = (mc_dep_cost_base) ? rk / pcs_ptr->r0 + c
198                                                                             : c;
199         }
200     }
201 
202     return;
203 }
204 
get_quantize_error(MacroblockPlane * p,const TranLow * coeff,TranLow * qcoeff,TranLow * dqcoeff,TxSize tx_size,uint16_t * eob,int64_t * recon_error,int64_t * sse)205 static AOM_INLINE void get_quantize_error(MacroblockPlane *p, const TranLow *coeff, TranLow *qcoeff,
206                                           TranLow *dqcoeff, TxSize tx_size, uint16_t *eob,
207                                           int64_t *recon_error, int64_t *sse) {
208     const ScanOrder *const scan_order =
209         &av1_scan_orders[tx_size][DCT_DCT]; //&av1_default_scan_orders[tx_size]
210     int       pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
211     const int shift   = tx_size == TX_32X32 ? 0 : 2;
212 
213     svt_av1_quantize_fp(coeff,
214                         pix_num,
215                         p->zbin_qtx,
216                         p->round_fp_qtx,
217                         p->quant_fp_qtx,
218                         p->quant_shift_qtx,
219                         qcoeff,
220                         dqcoeff,
221                         p->dequant_qtx,
222                         eob,
223                         scan_order->scan,
224                         scan_order->iscan);
225 
226     *recon_error = svt_av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
227     *recon_error = AOMMAX(*recon_error, 1);
228 
229     *sse = (*sse) >> shift;
230     *sse = AOMMAX(*sse, 1);
231 }
232 
rate_estimator(TranLow * qcoeff,int eob,TxSize tx_size)233 static int rate_estimator(TranLow *qcoeff, int eob, TxSize tx_size) {
234     const ScanOrder *const scan_order =
235         &av1_scan_orders[tx_size][DCT_DCT]; //&av1_default_scan_orders[tx_size]
236 
237     assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
238 
239     int rate_cost = 1;
240 
241     for (int idx = 0; idx < eob; ++idx) {
242         int abs_level = abs(qcoeff[scan_order->scan[idx]]);
243         rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1;
244     }
245 
246     return (rate_cost << AV1_PROB_COST_SHIFT);
247 }
248 
249 
250 
251 
result_model_store(PictureParentControlSet * pcs_ptr,TplStats * tpl_stats_ptr,uint32_t mb_origin_x,uint32_t mb_origin_y)252 static void result_model_store(PictureParentControlSet *pcs_ptr, TplStats  *tpl_stats_ptr,
253                                uint32_t mb_origin_x, uint32_t mb_origin_y) {
254     const int mi_height       = mi_size_high[BLOCK_16X16];
255     const int mi_width        = mi_size_wide[BLOCK_16X16];
256     const int step            = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
257     const int shift           = 3 + pcs_ptr->is_720p_or_larger;
258     const int aligned16_width = ((pcs_ptr->aligned_width + 15) / 16) << 4;
259 
260     int64_t srcrf_dist = tpl_stats_ptr->srcrf_dist / (mi_height * mi_width);
261     int64_t recrf_dist = tpl_stats_ptr->recrf_dist / (mi_height * mi_width);
262     int64_t srcrf_rate = tpl_stats_ptr->srcrf_rate / (mi_height * mi_width);
263     int64_t recrf_rate = tpl_stats_ptr->recrf_rate / (mi_height * mi_width);
264 
265     srcrf_dist = AOMMAX(1, srcrf_dist);
266     recrf_dist = AOMMAX(1, recrf_dist);
267     srcrf_rate = AOMMAX(1, srcrf_rate);
268     recrf_rate = AOMMAX(1, recrf_rate);
269 
270     for (int idy = 0; idy < mi_height; idy += step) {
271         TplStats *dst_ptr =
272             pcs_ptr->tpl_stats[((mb_origin_y >> shift) + (idy >> 1)) * (aligned16_width >> shift) +
273                                (mb_origin_x >> shift)];
274         for (int idx = 0; idx < mi_width; idx += step) {
275             dst_ptr->srcrf_dist    = srcrf_dist;
276             dst_ptr->recrf_dist    = recrf_dist;
277             dst_ptr->srcrf_rate    = srcrf_rate;
278             dst_ptr->recrf_rate    = recrf_rate;
279             dst_ptr->mv            = tpl_stats_ptr->mv;
280             dst_ptr->ref_frame_poc = tpl_stats_ptr->ref_frame_poc;
281             ++dst_ptr;
282         }
283     }
284 }
285 
286 
287 static const int16_t dc_qlookup_QTX[QINDEX_RANGE] = {
288     4,   8,   8,   9,   10,  11,  12,  12,  13,   14,   15,   16,   17,   18,   19,   19,
289     20,  21,  22,  23,  24,  25,  26,  26,  27,   28,   29,   30,   31,   32,   32,   33,
290     34,  35,  36,  37,  38,  38,  39,  40,  41,   42,   43,   43,   44,   45,   46,   47,
291     48,  48,  49,  50,  51,  52,  53,  53,  54,   55,   56,   57,   57,   58,   59,   60,
292     61,  62,  62,  63,  64,  65,  66,  66,  67,   68,   69,   70,   70,   71,   72,   73,
293     74,  74,  75,  76,  77,  78,  78,  79,  80,   81,   81,   82,   83,   84,   85,   85,
294     87,  88,  90,  92,  93,  95,  96,  98,  99,   101,  102,  104,  105,  107,  108,  110,
295     111, 113, 114, 116, 117, 118, 120, 121, 123,  125,  127,  129,  131,  134,  136,  138,
296     140, 142, 144, 146, 148, 150, 152, 154, 156,  158,  161,  164,  166,  169,  172,  174,
297     177, 180, 182, 185, 187, 190, 192, 195, 199,  202,  205,  208,  211,  214,  217,  220,
298     223, 226, 230, 233, 237, 240, 243, 247, 250,  253,  257,  261,  265,  269,  272,  276,
299     280, 284, 288, 292, 296, 300, 304, 309, 313,  317,  322,  326,  330,  335,  340,  344,
300     349, 354, 359, 364, 369, 374, 379, 384, 389,  395,  400,  406,  411,  417,  423,  429,
301     435, 441, 447, 454, 461, 467, 475, 482, 489,  497,  505,  513,  522,  530,  539,  549,
302     559, 569, 579, 590, 602, 614, 626, 640, 654,  668,  684,  700,  717,  736,  755,  775,
303     796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
304 };
305 
306 static const int16_t dc_qlookup_10_QTX[QINDEX_RANGE] = {
307     4,    9,    10,   13,   15,   17,   20,   22,   25,   28,   31,   34,   37,   40,   43,   47,
308     50,   53,   57,   60,   64,   68,   71,   75,   78,   82,   86,   90,   93,   97,   101,  105,
309     109,  113,  116,  120,  124,  128,  132,  136,  140,  143,  147,  151,  155,  159,  163,  166,
310     170,  174,  178,  182,  185,  189,  193,  197,  200,  204,  208,  212,  215,  219,  223,  226,
311     230,  233,  237,  241,  244,  248,  251,  255,  259,  262,  266,  269,  273,  276,  280,  283,
312     287,  290,  293,  297,  300,  304,  307,  310,  314,  317,  321,  324,  327,  331,  334,  337,
313     343,  350,  356,  362,  369,  375,  381,  387,  394,  400,  406,  412,  418,  424,  430,  436,
314     442,  448,  454,  460,  466,  472,  478,  484,  490,  499,  507,  516,  525,  533,  542,  550,
315     559,  567,  576,  584,  592,  601,  609,  617,  625,  634,  644,  655,  666,  676,  687,  698,
316     708,  718,  729,  739,  749,  759,  770,  782,  795,  807,  819,  831,  844,  856,  868,  880,
317     891,  906,  920,  933,  947,  961,  975,  988,  1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105,
318     1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379,
319     1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
320     1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197,
321     2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102,
322     3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
323 };
324 
325 static const int16_t dc_qlookup_12_QTX[QINDEX_RANGE] = {
326     4,     12,    18,    25,    33,    41,    50,    60,    70,    80,    91,    103,   115,
327     127,   140,   153,   166,   180,   194,   208,   222,   237,   251,   266,   281,   296,
328     312,   327,   343,   358,   374,   390,   405,   421,   437,   453,   469,   484,   500,
329     516,   532,   548,   564,   580,   596,   611,   627,   643,   659,   674,   690,   706,
330     721,   737,   752,   768,   783,   798,   814,   829,   844,   859,   874,   889,   904,
331     919,   934,   949,   964,   978,   993,   1008,  1022,  1037,  1051,  1065,  1080,  1094,
332     1108,  1122,  1136,  1151,  1165,  1179,  1192,  1206,  1220,  1234,  1248,  1261,  1275,
333     1288,  1302,  1315,  1329,  1342,  1368,  1393,  1419,  1444,  1469,  1494,  1519,  1544,
334     1569,  1594,  1618,  1643,  1668,  1692,  1717,  1741,  1765,  1789,  1814,  1838,  1862,
335     1885,  1909,  1933,  1957,  1992,  2027,  2061,  2096,  2130,  2165,  2199,  2233,  2267,
336     2300,  2334,  2367,  2400,  2434,  2467,  2499,  2532,  2575,  2618,  2661,  2704,  2746,
337     2788,  2830,  2872,  2913,  2954,  2995,  3036,  3076,  3127,  3177,  3226,  3275,  3324,
338     3373,  3421,  3469,  3517,  3565,  3621,  3677,  3733,  3788,  3843,  3897,  3951,  4005,
339     4058,  4119,  4181,  4241,  4301,  4361,  4420,  4479,  4546,  4612,  4677,  4742,  4807,
340     4871,  4942,  5013,  5083,  5153,  5222,  5291,  5367,  5442,  5517,  5591,  5665,  5745,
341     5825,  5905,  5984,  6063,  6149,  6234,  6319,  6404,  6495,  6587,  6678,  6769,  6867,
342     6966,  7064,  7163,  7269,  7376,  7483,  7599,  7715,  7832,  7958,  8085,  8214,  8352,
343     8492,  8635,  8788,  8945,  9104,  9275,  9450,  9639,  9832,  10031, 10245, 10465, 10702,
344     10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, 13501, 13913, 14343, 14807, 15290,
345     15812, 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387,
346 };
347 
av1_dc_quant_qtx(int qindex,int delta,AomBitDepth bit_depth)348 int16_t av1_dc_quant_qtx(int qindex, int delta, AomBitDepth bit_depth) {
349     const int q_clamped = clamp(qindex + delta, 0, MAXQ);
350     switch (bit_depth) {
351     case AOM_BITS_8: return dc_qlookup_QTX[q_clamped];
352     case AOM_BITS_10: return dc_qlookup_10_QTX[q_clamped];
353     case AOM_BITS_12: return dc_qlookup_12_QTX[q_clamped];
354     default: assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); return -1;
355     }
356 }
357 
svt_av1_compute_rd_mult_based_on_qindex(AomBitDepth bit_depth,int qindex)358 int svt_av1_compute_rd_mult_based_on_qindex(AomBitDepth bit_depth, int qindex) {
359     const int q = av1_dc_quant_qtx(qindex, 0, bit_depth);
360     //const int q = svt_av1_dc_quant_Q3(qindex, 0, bit_depth);
361     int rdmult = q * q;
362     rdmult     = rdmult * 3 + (rdmult * 2 / 3);
363     switch (bit_depth) {
364     case AOM_BITS_8: break;
365     case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
366     case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
367     default: assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); return -1;
368     }
369     return rdmult > 0 ? rdmult : 1;
370 }
371 
372 void svt_av1_build_quantizer(AomBitDepth bit_depth, int32_t y_dc_delta_q, int32_t u_dc_delta_q,
373                              int32_t u_ac_delta_q, int32_t v_dc_delta_q, int32_t v_ac_delta_q,
374                              Quants *const quants, Dequants *const deq);
375 
376 double svt_av1_convert_qindex_to_q(int32_t qindex, AomBitDepth bit_depth);
377 
378 int32_t svt_av1_compute_qdelta(double qstart, double qtarget, AomBitDepth bit_depth);
379 
380 extern void filter_intra_edge(OisMbResults *ois_mb_results_ptr, uint8_t mode,
381                               uint16_t max_frame_width, uint16_t max_frame_height, int32_t p_angle,
382                               int32_t cu_origin_x, int32_t cu_origin_y, uint8_t *above_row,
383                               uint8_t *left_col);
384 
385 //Given one reference frame identified by the pair (list_index,ref_index)
386 //indicate if ME data is valid
is_me_data_valid(const MeSbResults * me_results,uint32_t me_mb_offset,uint8_t list_idx,uint8_t ref_idx)387 static uint8_t is_me_data_valid(const MeSbResults *me_results, uint32_t me_mb_offset,
388                                 uint8_t list_idx, uint8_t ref_idx) {
389     uint8_t            total_me_cnt = me_results->total_me_candidate_index[me_mb_offset];
390     const MeCandidate *me_block_results =
391         &me_results->me_candidate_array[me_mb_offset * MAX_PA_ME_CAND];
392 
393     for (uint32_t me_cand_i = 0; me_cand_i < total_me_cnt; ++me_cand_i) {
394         const MeCandidate *me_cand = &me_block_results[me_cand_i];
395         assert(/*me_cand->direction >= 0 && */ me_cand->direction <= 2);
396         if (me_cand->direction == 0 || me_cand->direction == 2) {
397             if (list_idx == me_cand->ref0_list && ref_idx == me_cand->ref_idx_l0)
398                 return 1;
399         }
400         if (me_cand->direction == 1 || me_cand->direction == 2) {
401             if (list_idx == me_cand->ref1_list && ref_idx == me_cand->ref_idx_l1)
402                 return 1;
403         }
404     }
405     return 0;
406 }
407 
408 
clip_mv_in_pad(EbPictureBufferDesc * ref_pic_ptr,uint32_t mb_origin_x,uint32_t mb_origin_y,int16_t * x_curr_mv,int16_t * y_curr_mv)409 void clip_mv_in_pad(
410     EbPictureBufferDesc *ref_pic_ptr,
411     uint32_t mb_origin_x,
412     uint32_t mb_origin_y,
413     int16_t *x_curr_mv,
414     int16_t *y_curr_mv)
415 {
416     // Search area adjustment
417     int16_t blk_origin_x = mb_origin_x;
418     int16_t blk_origin_y = mb_origin_y;
419     int16_t bwidth = 16;
420     int16_t bheight = 16;
421     int16_t mvx = *x_curr_mv;
422     int16_t mvy = *y_curr_mv;
423     int16_t padx = TPL_PADX;
424     int16_t pady = TPL_PADY;
425 
426     if ((blk_origin_x + (mvx >> 3)) < -padx)
427         mvx = (-padx - blk_origin_x) << 3;
428 
429     if ((blk_origin_x + bwidth + (mvx >> 3)) > (padx + ref_pic_ptr->max_width - 1))
430         mvx = ((padx + ref_pic_ptr->max_width - 1) - (blk_origin_x + bwidth)) << 3;
431 
432     if ((blk_origin_y + (mvy >> 3)) < -pady)
433         mvy = (-pady - blk_origin_y) << 3;
434 
435     if ((blk_origin_y + bheight + (mvy >> 3)) > (pady + ref_pic_ptr->max_height - 1))
436         mvy = ((pady + ref_pic_ptr->max_height - 1) - (blk_origin_y + bheight)) << 3;
437 
438     *x_curr_mv = mvx;
439     *y_curr_mv = mvy;
440 }
441 // Reference pruning, Loop over all available references and get the best reference idx based on SAD
get_best_reference(PictureParentControlSet * pcs_ptr,uint32_t sb_index,uint32_t me_mb_offset,uint32_t mb_origin_x,uint32_t mb_origin_y,uint32_t * best_reference)442 void get_best_reference(
443     PictureParentControlSet *pcs_ptr,
444     uint32_t sb_index,
445     uint32_t   me_mb_offset,
446     uint32_t mb_origin_x,
447     uint32_t mb_origin_y,
448     uint32_t *best_reference )
449 {
450     EbPictureBufferDesc *input_ptr     = pcs_ptr->enhanced_picture_ptr;
451     uint32_t             max_inter_ref = MAX_PA_ME_MV;
452     EbPictureBufferDesc *ref_pic_ptr;
453     int16_t              x_curr_mv          = 0;
454     int16_t              y_curr_mv          = 0;
455     uint32_t             best_reference_sad = UINT32_MAX;
456     uint32_t             reference_sad;
457     uint8_t *            src_mb = input_ptr->buffer_y + input_ptr->origin_x + mb_origin_x +
458         (input_ptr->origin_y + mb_origin_y) * input_ptr->stride_y;
459 
460     for (uint32_t rf_idx = 0; rf_idx < max_inter_ref; rf_idx++) {
461         uint32_t list_index    = rf_idx < 4 ? 0 : 1;
462         uint32_t ref_pic_index = rf_idx >= 4 ? (rf_idx - 4) : rf_idx;
463         if ((list_index == 0 && (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref0_count) ||
464             (list_index == 1 && (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref1_count))
465             continue;
466         if (!is_me_data_valid(
467                 pcs_ptr->pa_me_data->me_results[sb_index], me_mb_offset, list_index, ref_pic_index))
468             continue;
469         ref_pic_ptr = (EbPictureBufferDesc *)pcs_ptr->tpl_data
470                           .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
471                           .picture_ptr;
472 
473         const MeSbResults *me_results = pcs_ptr->pa_me_data->me_results[sb_index];
474         x_curr_mv =
475             me_results
476                 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) + ref_pic_index]
477                 .x_mv
478             << 1;
479         y_curr_mv =
480             me_results
481                 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) + ref_pic_index]
482                 .y_mv
483             << 1;
484         clip_mv_in_pad(ref_pic_ptr,mb_origin_x,mb_origin_y,&x_curr_mv,&y_curr_mv);
485         MV      best_mv          = {y_curr_mv, x_curr_mv};
486         int32_t ref_origin_index = ref_pic_ptr->origin_x + (mb_origin_x + (best_mv.col >> 3)) +
487             (mb_origin_y + (best_mv.row >> 3) + ref_pic_ptr->origin_y) * ref_pic_ptr->stride_y;
488         reference_sad = svt_nxm_sad_kernel_sub_sampled(src_mb,
489                                                        input_ptr->stride_y,
490                                                        ref_pic_ptr->buffer_y + ref_origin_index,
491                                                        ref_pic_ptr->stride_y,
492                                                        16,
493                                                        16);
494         if (reference_sad < best_reference_sad) {
495             best_reference_sad = reference_sad;
496             *best_reference    = rf_idx;
497         }
498     }
499     return;
500 }
501 
502 
503 
504 
505 
506 /*
507     TPL Dispenser SB based (sz 64x64)
508 */
tpl_mc_flow_dispenser_sb(EncodeContext * encode_context_ptr,SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr,int32_t frame_idx,uint32_t sb_index,int32_t qIndex)509 void tpl_mc_flow_dispenser_sb(
510     EncodeContext                   *encode_context_ptr,
511     SequenceControlSet              *scs_ptr,
512     PictureParentControlSet        *pcs_ptr,
513     int32_t                          frame_idx,
514     uint32_t                        sb_index,
515     int32_t                         qIndex)
516 {
517 {
518     uint32_t             picture_width_in_mb = (pcs_ptr->enhanced_picture_ptr->width + 16 - 1) / 16;
519     int16_t              x_curr_mv           = 0;
520     int16_t              y_curr_mv           = 0;
521     uint32_t             me_mb_offset        = 0;
522     TxSize               tx_size             = TX_16X16;
523     EbPictureBufferDesc *ref_pic_ptr;
524     BlockGeom            blk_geom;
525     EbPictureBufferDesc *input_picture_ptr = pcs_ptr->enhanced_picture_ptr;
526     EbPictureBufferDesc *recon_picture_ptr =
527         encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx];
528     TplStats tpl_stats;
529 
530     DECLARE_ALIGNED(32, uint8_t, predictor8[256 * 2]);
531     DECLARE_ALIGNED(32, int16_t, src_diff[256]);
532     DECLARE_ALIGNED(32, TranLow, coeff[256]);
533     DECLARE_ALIGNED(32, TranLow, qcoeff[256]);
534     DECLARE_ALIGNED(32, TranLow, dqcoeff[256]);
535     DECLARE_ALIGNED(32, TranLow, best_coeff[256]);
536     uint8_t *predictor = predictor8;
537 
538     blk_geom.bwidth  = 16;
539     blk_geom.bheight = 16;
540 
541     MacroblockPlane mb_plane;
542     mb_plane.quant_qtx       = scs_ptr->quants_8bit.y_quant[qIndex];
543     mb_plane.quant_fp_qtx    = scs_ptr->quants_8bit.y_quant_fp[qIndex];
544     mb_plane.round_fp_qtx    = scs_ptr->quants_8bit.y_round_fp[qIndex];
545     mb_plane.quant_shift_qtx = scs_ptr->quants_8bit.y_quant_shift[qIndex];
546     mb_plane.zbin_qtx        = scs_ptr->quants_8bit.y_zbin[qIndex];
547     mb_plane.round_qtx       = scs_ptr->quants_8bit.y_round[qIndex];
548     mb_plane.dequant_qtx     = scs_ptr->deq_8bit.y_dequant_qtx[qIndex];
549 
550     EbPictureBufferDesc *input_ptr = pcs_ptr->enhanced_picture_ptr;
551     const uint8_t tpl_opt_flag = pcs_ptr->tpl_ctrls.tpl_opt_flag;
552 
553 
554     SbParams *sb_params    = &scs_ptr->sb_params_array[sb_index];
555     uint32_t  pa_blk_index = 0;
556     while (pa_blk_index < CU_MAX_COUNT) {
557         const CodedBlockStats *blk_stats_ptr;
558         blk_stats_ptr              = get_coded_blk_stats(pa_blk_index);
559         uint8_t bsize              = blk_stats_ptr->size;
560         EbBool  small_boundary_blk = EB_FALSE;
561 
562         {
563             uint32_t cu_origin_x = sb_params->origin_x + blk_stats_ptr->origin_x;
564             uint32_t cu_origin_y = sb_params->origin_y + blk_stats_ptr->origin_y;
565             if ((blk_stats_ptr->origin_x % 16) == 0 &&
566                 (blk_stats_ptr->origin_y % 16) == 0 &&
567                 ((pcs_ptr->enhanced_picture_ptr->width - cu_origin_x) < 16 ||
568                     (pcs_ptr->enhanced_picture_ptr->height - cu_origin_y) < 16))
569                 small_boundary_blk = EB_TRUE;
570         }
571         if (bsize != 16 && !small_boundary_blk) {
572             pa_blk_index++;
573             continue;
574         }
575         if (sb_params->raster_scan_blk_validity[md_scan_to_raster_scan[pa_blk_index]]) {
576             uint32_t  mb_origin_x       = sb_params->origin_x + blk_stats_ptr->origin_x;
577             uint32_t  mb_origin_y       = sb_params->origin_y + blk_stats_ptr->origin_y;
578             const int dst_buffer_stride = recon_picture_ptr->stride_y;
579             const int dst_mb_offset     = mb_origin_y * dst_buffer_stride + mb_origin_x;
580             const int dst_basic_offset  = recon_picture_ptr->origin_y *
581                     recon_picture_ptr->stride_y +
582                 recon_picture_ptr->origin_x;
583             uint8_t *dst_buffer = recon_picture_ptr->buffer_y + dst_basic_offset +
584                 dst_mb_offset;
585 
586             int64_t  inter_cost;
587             int64_t  recon_error = 1, sse = 1;
588             uint64_t best_ref_poc    = 0;
589             int32_t  best_rf_idx     = -1;
590             int64_t  best_inter_cost = INT64_MAX;
591             MV       final_best_mv   = {0, 0};
592             uint32_t max_inter_ref   = MAX_PA_ME_MV;
593 
594             PredictionMode best_intra_mode = DC_PRED;
595             int64_t        best_intra_cost = INT64_MAX;
596             // Disable intra prediction
597             uint8_t disable_intra_pred  = tpl_opt_flag && (pcs_ptr->tpl_ctrls.disable_intra_pred_nref ||
598                 pcs_ptr->tpl_ctrls.disable_intra_pred_nbase);
599             if (!disable_intra_pred ||
600                 (pcs_ptr->tpl_ctrls.disable_intra_pred_nref && pcs_ptr->tpl_data.is_used_as_reference_flag) ||
601                 (pcs_ptr->tpl_ctrls.disable_intra_pred_nbase && pcs_ptr->tpl_data.tpl_temporal_layer_index == 0)){
602                 if (scs_ptr->in_loop_ois == 0) {
603                     OisMbResults *ois_mb_results_ptr =
604                         pcs_ptr->ois_mb_results[(mb_origin_y >> 4) * picture_width_in_mb +
605                                                 (mb_origin_x >> 4)];
606                     best_intra_mode = ois_mb_results_ptr->intra_mode;
607                     best_intra_cost = ois_mb_results_ptr->intra_cost;
608 
609                 } else { // ois
610                     // always process as block16x16 even bsize or tx_size is 8x8
611                     bsize = 16;
612                     DECLARE_ALIGNED(16, uint8_t, left0_data[MAX_TX_SIZE * 2 + 32]);
613                     DECLARE_ALIGNED(16, uint8_t, above0_data[MAX_TX_SIZE * 2 + 32]);
614                     DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
615                     DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
616 
617                     uint8_t *above_row;
618                     uint8_t *left_col;
619                     uint8_t *above0_row;
620                     uint8_t *left0_col;
621                     above0_row = above0_data + 16;
622                     left0_col  = left0_data + 16;
623                     above_row  = above_data + 16;
624                     left_col   = left_data + 16;
625 
626                     uint8_t *src = input_ptr->buffer_y +
627                         pcs_ptr->enhanced_picture_ptr->origin_x + mb_origin_x +
628                         (pcs_ptr->enhanced_picture_ptr->origin_y + mb_origin_y) *
629                             input_ptr->stride_y;
630 
631                     // Fill Neighbor Arrays
632                     update_neighbor_samples_array_open_loop_mb(
633                                                                 1, // use_top_righ_bottom_left
634                                                                 1, // update_top_neighbor
635                                                                 above0_row - 1,
636                                                                 left0_col - 1,
637                                                                 input_ptr,
638                                                                 input_ptr->stride_y,
639                                                                 mb_origin_x,
640                                                                 mb_origin_y,
641                                                                 bsize,
642                                                                 bsize);
643 
644                     uint8_t ois_intra_mode;
645                     uint8_t intra_mode_start = DC_PRED;
646                     EbBool  enable_paeth  = pcs_ptr->scs_ptr->static_config.enable_paeth ==
647                             DEFAULT
648                             ? EB_TRUE
649                             : (EbBool)pcs_ptr->scs_ptr->static_config.enable_paeth;
650                     EbBool  enable_smooth = pcs_ptr->scs_ptr->static_config.enable_smooth ==
651                             DEFAULT
652                             ? EB_TRUE
653                             : (EbBool)pcs_ptr->scs_ptr->static_config.enable_smooth;
654                     uint8_t intra_mode_end =
655                     pcs_ptr->tpl_ctrls.tpl_opt_flag
656 
657                         ? DC_PRED
658                         : enable_paeth      ? PAETH_PRED
659                             : enable_smooth ? SMOOTH_H_PRED
660                                             : D67_PRED;
661 
662                     for (ois_intra_mode = intra_mode_start;
663                             ois_intra_mode <= intra_mode_end;
664                             ++ois_intra_mode) {
665                         int32_t p_angle = av1_is_directional_mode(
666                                                 (PredictionMode)ois_intra_mode)
667                             ? mode_to_angle_map[(PredictionMode)ois_intra_mode]
668                             : 0;
669                         // Edge filter
670                         if (av1_is_directional_mode((PredictionMode)ois_intra_mode) &&
671                             1 /*scs_ptr->seq_header.enable_intra_edge_filter*/) {
672                             EB_MEMCPY(left_data,
673                                         left0_data,
674                                         sizeof(uint8_t) * (MAX_TX_SIZE * 2 + 32));
675                             EB_MEMCPY(above_data,
676                                         above0_data,
677                                         sizeof(uint8_t) * (MAX_TX_SIZE * 2 + 32));
678                             above_row = above_data + 16;
679                             left_col  = left_data + 16;
680                             filter_intra_edge(NULL,
681                                                 ois_intra_mode,
682                                                 scs_ptr->seq_header.max_frame_width,
683                                                 scs_ptr->seq_header.max_frame_height,
684                                                 p_angle,
685                                                 (int32_t)mb_origin_x,
686                                                 (int32_t)mb_origin_y,
687                                                 above_row,
688                                                 left_col);
689                         } else {
690                             above_row = above0_row;
691                             left_col  = left0_col;
692                         }
693                         // PRED
694                         intra_prediction_open_loop_mb(p_angle,
695                                                         ois_intra_mode,
696                                                         mb_origin_x,
697                                                         mb_origin_y,
698                                                         tx_size,
699                                                         above_row,
700                                                         left_col,
701                                                         predictor,
702                                                         16);
703 
704                         // Distortion
705                         int64_t intra_cost;
706                         if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_intra_search) {
707                             intra_cost = svt_nxm_sad_kernel_sub_sampled(
708                                 src,
709                                 input_ptr->stride_y,
710                                 predictor,
711                                 16,
712                                 16,
713                                 16);
714                         }
715                         else {
716                             svt_aom_subtract_block(
717                                 16, 16, src_diff, 16, src, input_ptr->stride_y, predictor, 16);
718                             EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
719                             svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size, pf_shape, 8, 0);
720                              intra_cost = svt_aom_satd(coeff, 16 * 16);
721                         }
722 
723                         if (intra_cost < best_intra_cost) {
724                             best_intra_cost = intra_cost;
725                             best_intra_mode = ois_intra_mode;
726                         }
727                     }
728                 }
729             }
730             uint8_t  best_mode = DC_PRED;
731             uint8_t *src_mb    = input_picture_ptr->buffer_y + input_picture_ptr->origin_x +
732                 mb_origin_x +
733                 (input_picture_ptr->origin_y + mb_origin_y) * input_picture_ptr->stride_y;
734             memset(&tpl_stats, 0, sizeof(tpl_stats));
735             blk_geom.origin_x = blk_stats_ptr->origin_x;
736             blk_geom.origin_y = blk_stats_ptr->origin_y;
737             me_mb_offset      = get_me_info_index(
738                 pcs_ptr->max_number_of_pus_per_sb, &blk_geom, 0, 0);
739 
740             uint32_t best_reference = 0;
741             if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.get_best_ref)
742                 // Reference pruning
743                 get_best_reference(pcs_ptr,
744                                     sb_index,
745                                     me_mb_offset,
746                                     mb_origin_x,
747                                     mb_origin_y,
748                                     &best_reference);
749 
750             for (uint32_t rf_idx = 0; rf_idx < max_inter_ref; rf_idx++) {
751                 if (pcs_ptr->tpl_ctrls.get_best_ref)
752                     if (rf_idx != best_reference)
753                         continue;
754                 uint32_t list_index    = rf_idx < 4 ? 0 : 1;
755                 uint32_t ref_pic_index = rf_idx >= 4 ? (rf_idx - 4) : rf_idx;
756                 if ((list_index == 0 &&
757                         (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref0_count) ||
758                     (list_index == 1 &&
759                         (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref1_count))
760                     continue;
761                 if (!is_me_data_valid(pcs_ptr->pa_me_data->me_results[sb_index],
762                                         me_mb_offset,
763                                         list_index,
764                                         ref_pic_index))
765                     continue;
766                 ref_pic_ptr = (EbPictureBufferDesc *)pcs_ptr->tpl_data
767                                     .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
768                                     .picture_ptr;
769                 const MeSbResults *me_results = pcs_ptr->pa_me_data->me_results[sb_index];
770                 x_curr_mv                     = me_results
771                                 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV +
772                                                 (list_index ? 4 : 0) + ref_pic_index]
773                                 .x_mv
774                     << 1;
775                 y_curr_mv = me_results
776                                 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV +
777                                                 (list_index ? 4 : 0) + ref_pic_index]
778                                 .y_mv
779                     << 1;
780                 clip_mv_in_pad(ref_pic_ptr,mb_origin_x,mb_origin_y,&x_curr_mv,&y_curr_mv);
781                 MV      best_mv          = {y_curr_mv, x_curr_mv};
782                 if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_inter_search) {
783                     int32_t ref_origin_index = ref_pic_ptr->origin_x +
784                         (mb_origin_x + (best_mv.col >> 3)) +
785                         (mb_origin_y + (best_mv.row >> 3) +
786                             ref_pic_ptr->origin_y) * ref_pic_ptr->stride_y;
787                     //sad_1
788                     inter_cost = svt_nxm_sad_kernel_sub_sampled(
789                         src_mb,
790                         input_ptr->stride_y,
791                         ref_pic_ptr->buffer_y + ref_origin_index,
792                         ref_pic_ptr->stride_y,
793                         16,
794                         16);
795                 }
796                 else {
797                     int32_t ref_origin_index = ref_pic_ptr->origin_x +
798                         (mb_origin_x + (best_mv.col >> 3)) +
799                         (mb_origin_y + (best_mv.row >> 3) + ref_pic_ptr->origin_y) *
800                         ref_pic_ptr->stride_y;
801 
802                     svt_aom_subtract_block(16,
803                         16,
804                         src_diff,
805                         16,
806                         src_mb,
807                         input_picture_ptr->stride_y,
808                         ref_pic_ptr->buffer_y + ref_origin_index,
809                         ref_pic_ptr->stride_y);
810                     EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
811                     svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size, pf_shape, 8, 0);
812 
813                     inter_cost = svt_aom_satd(coeff, 256);
814                 }
815                 if (inter_cost < best_inter_cost) {
816                     if (!(pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_inter_search))
817                     EB_MEMCPY(best_coeff, coeff, sizeof(best_coeff));
818                     best_ref_poc = pcs_ptr->tpl_data
819                                         .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
820                                         .picture_number;
821 
822                     best_rf_idx     = rf_idx;
823                     best_inter_cost = inter_cost;
824                     final_best_mv   = best_mv;
825 
826                     if (best_inter_cost < best_intra_cost)
827                         best_mode = NEWMV;
828                 }
829             } // rf_idx
830 
831             if (best_mode == NEWMV) {
832                 uint16_t eob = 0;
833                 if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_inter_search) {
834                     uint32_t list_index = best_rf_idx < 4 ? 0 : 1;
835                     uint32_t ref_pic_index = best_rf_idx >= 4 ? (best_rf_idx - 4) : best_rf_idx;
836 
837                     ref_pic_ptr = (EbPictureBufferDesc*)pcs_ptr->tpl_data.tpl_ref_ds_ptr_array[list_index][ref_pic_index].picture_ptr;
838 
839                     int32_t ref_origin_index = ref_pic_ptr->origin_x +
840                         (mb_origin_x + (final_best_mv.col >> 3)) +
841                         (mb_origin_y + (final_best_mv.row >> 3) +
842                             ref_pic_ptr->origin_y) * ref_pic_ptr->stride_y;
843                     svt_aom_subtract_block(16, 16, src_diff, 16, src_mb, input_picture_ptr->stride_y,
844                         ref_pic_ptr->buffer_y + ref_origin_index, ref_pic_ptr->stride_y);
845                     EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
846                     svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size, pf_shape, 8, 0);
847                     memcpy(best_coeff, coeff, sizeof(best_coeff));
848                 }
849                 get_quantize_error(&mb_plane,
850                                     best_coeff,
851                                     qcoeff,
852                                     dqcoeff,
853                                     tx_size,
854                                     &eob,
855                                     &recon_error,
856                                     &sse);
857                 int rate_cost = pcs_ptr->tpl_ctrls.tpl_opt_flag ? 0 : rate_estimator(qcoeff, eob, tx_size);
858 
859                 tpl_stats.srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
860                 tpl_stats.srcrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
861             }
862             if (best_mode == NEWMV) {
863                 // inter recon with rec_picture as reference pic
864                 uint64_t ref_poc       = best_ref_poc;
865                 uint32_t list_index    = best_rf_idx < 4 ? 0 : 1;
866                 uint32_t ref_pic_index = best_rf_idx >= 4 ? (best_rf_idx - 4) : best_rf_idx;
867                 if (pcs_ptr->tpl_data.ref_in_slide_window[list_index][ref_pic_index]) {
868                     uint32_t ref_frame_idx = 0;
869                     while (ref_frame_idx < MAX_TPL_LA_SW &&
870                             encode_context_ptr->poc_map_idx[ref_frame_idx] != ref_poc)
871                         ref_frame_idx++;
872                     assert(ref_frame_idx != MAX_TPL_LA_SW);
873                     ref_pic_ptr =
874                         encode_context_ptr->mc_flow_rec_picture_buffer[ref_frame_idx];
875                 } else
876                     ref_pic_ptr = (EbPictureBufferDesc *)pcs_ptr->tpl_data
877                                         .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
878                                         .picture_ptr;
879                 int32_t ref_origin_index = ref_pic_ptr->origin_x +
880                     (mb_origin_x + (final_best_mv.col >> 3)) +
881                     (mb_origin_y + (final_best_mv.row >> 3) + ref_pic_ptr->origin_y) *
882                         ref_pic_ptr->stride_y;
883                 for (int i = 0; i < 16; ++i)
884                     EB_MEMCPY(dst_buffer + i * dst_buffer_stride,
885                                 ref_pic_ptr->buffer_y + ref_origin_index +
886                                     i * ref_pic_ptr->stride_y,
887                                 sizeof(uint8_t) * (16));
888             } else {
889                 // intra recon
890 
891                 uint8_t *above_row;
892                 uint8_t *left_col;
893                 DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
894                 DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
895 
896                 above_row             = above_data + 16;
897                 left_col              = left_data + 16;
898                 uint8_t *recon_buffer = recon_picture_ptr->buffer_y + dst_basic_offset;
899 
900                 update_neighbor_samples_array_open_loop_mb_recon(
901                                                                     1, // use_top_righ_bottom_left
902                                                                     1, // update_top_neighbor
903                                                                     above_row - 1,
904                                                                     left_col - 1,
905                                                                     recon_buffer,
906                                                                     dst_buffer_stride,
907                                                                     mb_origin_x,
908                                                                     mb_origin_y,
909                                                                     16,
910                                                                     16,
911                                                                     input_picture_ptr->width,
912                                                                     input_picture_ptr->height);
913 
914                 uint8_t ois_intra_mode = best_intra_mode; // ois_mb_results_ptr->intra_mode;
915                 int32_t p_angle = av1_is_directional_mode((PredictionMode)ois_intra_mode)
916                     ? mode_to_angle_map[(PredictionMode)ois_intra_mode]
917                     : 0;
918                 // Edge filter
919                 if (av1_is_directional_mode((PredictionMode)ois_intra_mode) &&
920                     1 /*scs_ptr->seq_header.enable_intra_edge_filter*/) {
921                     filter_intra_edge(NULL,
922                                         ois_intra_mode,
923                                         scs_ptr->seq_header.max_frame_width,
924                                         scs_ptr->seq_header.max_frame_height,
925                                         p_angle,
926                                         mb_origin_x,
927                                         mb_origin_y,
928                                         above_row,
929                                         left_col);
930                 }
931                 // PRED
932                 intra_prediction_open_loop_mb(p_angle,
933                                                 ois_intra_mode,
934                                                 mb_origin_x,
935                                                 mb_origin_y,
936                                                 tx_size,
937                                                 above_row,
938                                                 left_col,
939                                                 dst_buffer,
940                                                 dst_buffer_stride);
941             }
942 
943             svt_aom_subtract_block(16,
944                                     16,
945                                     src_diff,
946                                     16,
947                                     src_mb,
948                                     input_picture_ptr->stride_y,
949                                     dst_buffer,
950                                     dst_buffer_stride);
951             EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
952             svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size,pf_shape, 8, 0);
953 
954             uint16_t eob = 0;
955 
956             get_quantize_error(
957                 &mb_plane, coeff, qcoeff, dqcoeff, tx_size, &eob, &recon_error, &sse);
958             int rate_cost = pcs_ptr->tpl_ctrls.tpl_opt_flag ? 0 : rate_estimator(qcoeff, eob, tx_size);
959             // Disable intra prediction
960             disable_intra_pred  = tpl_opt_flag && (pcs_ptr->tpl_ctrls.disable_intra_pred_nref ||
961                 pcs_ptr->tpl_ctrls.disable_intra_pred_nbase);
962             if (!disable_intra_pred || (pcs_ptr->tpl_data.is_used_as_reference_flag))
963                 if (eob) {
964                     av1_inv_transform_recon8bit((int32_t *)dqcoeff,
965                                                 dst_buffer,
966                                                 dst_buffer_stride,
967                                                 dst_buffer,
968                                                 dst_buffer_stride,
969                                                 TX_16X16,
970                                                 DCT_DCT,
971                                                 PLANE_TYPE_Y,
972                                                 eob,
973                                                 0);
974                 }
975 
976             tpl_stats.recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
977             tpl_stats.recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
978             if (best_mode != NEWMV) {
979                 tpl_stats.srcrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
980                 tpl_stats.srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
981             }
982             tpl_stats.recrf_dist = AOMMAX(tpl_stats.srcrf_dist, tpl_stats.recrf_dist);
983             tpl_stats.recrf_rate = AOMMAX(tpl_stats.srcrf_rate, tpl_stats.recrf_rate);
984             if (pcs_ptr->tpl_data.tpl_slice_type != I_SLICE && best_rf_idx != -1) {
985                 tpl_stats.mv            = final_best_mv;
986                 tpl_stats.ref_frame_poc = best_ref_poc;
987             }
988             // Motion flow dependency dispenser.
989             result_model_store(pcs_ptr, &tpl_stats, mb_origin_x, mb_origin_y);
990         }
991         pa_blk_index++;
992     }
993 
994     }
995 
996 }
997 
998 #define TPL_TASKS_MDC_INPUT 0
999 #define TPL_TASKS_ENCDEC_INPUT 1
1000 #define TPL_TASKS_CONTINUE 2
1001 /*
1002    Assign TPL dispenser segments
1003 */
assign_tpl_segments(EncDecSegments * segmentPtr,uint16_t * segmentInOutIndex,TplDispResults * taskPtr,int32_t frame_idx,EbFifo * srmFifoPtr)1004 EbBool assign_tpl_segments(EncDecSegments *segmentPtr, uint16_t *segmentInOutIndex,
1005                                TplDispResults * taskPtr,
1006     int32_t                          frame_idx, EbFifo *srmFifoPtr) {
1007     EbBool           continue_processing_flag = EB_FALSE;
1008     uint32_t row_segment_index = 0;
1009     uint32_t segment_index;
1010     uint32_t right_segment_index;
1011     uint32_t bottom_left_segment_index;
1012 
1013     int16_t feedback_row_index = -1;
1014 
1015     uint32_t self_assigned = EB_FALSE;
1016 
1017     //static FILE *trace = 0;
1018     //
1019     //if(trace == 0) {
1020     //    trace = fopen("seg-trace.txt","w");
1021     //}
1022 
1023     switch (taskPtr->input_type) {
1024     case TPL_TASKS_MDC_INPUT:
1025 
1026         // The entire picture is provided by the MDC process, so
1027         //   no logic is necessary to clear input dependencies.
1028         for (uint32_t row_index = 0; row_index < segmentPtr->segment_row_count; ++row_index) {
1029             segmentPtr->row_array[row_index].current_seg_index =
1030                 segmentPtr->row_array[row_index].starting_seg_index;
1031         }
1032 
1033 
1034         // Start on Segment 0 immediately
1035         *segmentInOutIndex  = segmentPtr->row_array[0].current_seg_index;
1036         taskPtr->input_type = TPL_TASKS_CONTINUE;
1037         ++segmentPtr->row_array[0].current_seg_index;
1038         continue_processing_flag = EB_TRUE;
1039 
1040         //fprintf(trace, "Start  Pic: %u Seg: %u\n",
1041         //    (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1042         //    *segmentInOutIndex);
1043 
1044         break;
1045 
1046     case TPL_TASKS_ENCDEC_INPUT:
1047 
1048         // Setup row_segment_index to release the in_progress token
1049         //row_segment_index = taskPtr->encDecSegmentRowArray[0];
1050 
1051         // Start on the assigned row immediately
1052         *segmentInOutIndex  = segmentPtr->row_array[taskPtr->enc_dec_segment_row].current_seg_index;
1053         taskPtr->input_type = TPL_TASKS_CONTINUE;
1054         ++segmentPtr->row_array[taskPtr->enc_dec_segment_row].current_seg_index;
1055         continue_processing_flag = EB_TRUE;
1056 
1057         //fprintf(trace, "Start  Pic: %u Seg: %u\n",
1058         //    (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1059         //    *segmentInOutIndex);
1060 
1061         break;
1062 
1063     case TPL_TASKS_CONTINUE:
1064 
1065         // Update the Dependency List for Right and Bottom Neighbors
1066         segment_index     = *segmentInOutIndex;
1067         row_segment_index = segment_index / segmentPtr->segment_band_count;
1068 
1069         right_segment_index       = segment_index + 1;
1070         bottom_left_segment_index = segment_index + segmentPtr->segment_band_count;
1071 
1072         // Right Neighbor
1073         if (segment_index < segmentPtr->row_array[row_segment_index].ending_seg_index) {
1074             svt_block_on_mutex(segmentPtr->row_array[row_segment_index].assignment_mutex);
1075 
1076             --segmentPtr->dep_map.dependency_map[right_segment_index];
1077 
1078             if (segmentPtr->dep_map.dependency_map[right_segment_index] == 0) {
1079                 *segmentInOutIndex = segmentPtr->row_array[row_segment_index].current_seg_index;
1080                 ++segmentPtr->row_array[row_segment_index].current_seg_index;
1081                 self_assigned            = EB_TRUE;
1082                 continue_processing_flag = EB_TRUE;
1083 
1084                 //fprintf(trace, "Start  Pic: %u Seg: %u\n",
1085                 //    (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1086                 //    *segmentInOutIndex);
1087             }
1088 
1089             svt_release_mutex(segmentPtr->row_array[row_segment_index].assignment_mutex);
1090         }
1091 
1092         // Bottom-left Neighbor
1093         if (row_segment_index < segmentPtr->segment_row_count - 1 &&
1094             bottom_left_segment_index >=
1095                 segmentPtr->row_array[row_segment_index + 1].starting_seg_index) {
1096             svt_block_on_mutex(segmentPtr->row_array[row_segment_index + 1].assignment_mutex);
1097 
1098             --segmentPtr->dep_map.dependency_map[bottom_left_segment_index];
1099 
1100             if (segmentPtr->dep_map.dependency_map[bottom_left_segment_index] == 0) {
1101                 if (self_assigned == EB_TRUE)
1102                     feedback_row_index = (int16_t)row_segment_index + 1;
1103                 else {
1104                     *segmentInOutIndex =
1105                         segmentPtr->row_array[row_segment_index + 1].current_seg_index;
1106                     ++segmentPtr->row_array[row_segment_index + 1].current_seg_index;
1107                     continue_processing_flag = EB_TRUE;
1108 
1109                     //fprintf(trace, "Start  Pic: %u Seg: %u\n",
1110                     //    (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1111                     //    *segmentInOutIndex);
1112                 }
1113             }
1114             svt_release_mutex(segmentPtr->row_array[row_segment_index + 1].assignment_mutex);
1115         }
1116 
1117         if (feedback_row_index > 0) {
1118 
1119             EbObjectWrapper *out_results_wrapper_ptr;
1120 
1121             svt_get_empty_object(
1122                     srmFifoPtr ,
1123                     &out_results_wrapper_ptr);
1124 
1125             TplDispResults *out_results_ptr = (TplDispResults*)out_results_wrapper_ptr->object_ptr;
1126             out_results_ptr->input_type          = TPL_TASKS_ENCDEC_INPUT;
1127 
1128             out_results_ptr->enc_dec_segment_row = feedback_row_index;
1129             out_results_ptr->tile_group_index = taskPtr->tile_group_index;
1130             out_results_ptr->qIndex = taskPtr->qIndex;
1131 
1132             out_results_ptr->pcs_wrapper_ptr = taskPtr->pcs_wrapper_ptr;
1133             out_results_ptr->pcs_ptr = taskPtr->pcs_ptr;
1134             out_results_ptr->frame_index = frame_idx;
1135             svt_post_full_object(out_results_wrapper_ptr);
1136         }
1137 
1138         break;
1139 
1140     default: break;
1141     }
1142 
1143     return continue_processing_flag;
1144 }
1145 
1146 
1147 
1148 
1149 
1150 /************************************************
1151 * Genrate TPL MC Flow Dispenser  Based on Lookahead
1152 ** LAD Window: sliding window size
1153 ************************************************/
1154 
1155 
tpl_mc_flow_dispenser(EncodeContext * encode_context_ptr,SequenceControlSet * scs_ptr,int32_t * base_rdmult,PictureParentControlSet * pcs_ptr,int32_t frame_idx,SourceBasedOperationsContext * context_ptr)1156 void tpl_mc_flow_dispenser(
1157     EncodeContext                   *encode_context_ptr,
1158     SequenceControlSet              *scs_ptr,
1159     int32_t                         *base_rdmult,
1160     PictureParentControlSet        *pcs_ptr,
1161     int32_t                          frame_idx,
1162     SourceBasedOperationsContext    *context_ptr)
1163 {
1164     EbPictureBufferDesc *recon_picture_ptr = encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx];
1165 
1166 
1167     int32_t         qIndex = quantizer_to_qindex[(uint8_t)scs_ptr->static_config.qp];
1168     if (pcs_ptr->tpl_ctrls.enable_tpl_qps){
1169         const double delta_rate_new[7][6] = {
1170             {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, // 1L
1171             {0.6, 1.0, 1.0, 1.0, 1.0, 1.0}, // 2L
1172             {0.6, 0.8, 1.0, 1.0, 1.0, 1.0}, // 3L
1173             {0.6, 0.8, 0.9, 1.0, 1.0, 1.0}, // 4L
1174             {0.35, 0.6, 0.8, 0.9, 1.0, 1.0}, //5L
1175             {0.35, 0.6, 0.8, 0.9, 0.95, 1.0} //6L
1176         };
1177         double q_val;
1178         q_val = svt_av1_convert_qindex_to_q(qIndex, 8);
1179         int32_t delta_qindex;
1180         if (pcs_ptr->tpl_data.tpl_slice_type == I_SLICE)
1181             delta_qindex = svt_av1_compute_qdelta(q_val, q_val * 0.25, 8);
1182         else
1183             delta_qindex = svt_av1_compute_qdelta(
1184                 q_val,
1185                 q_val *
1186                     delta_rate_new[pcs_ptr->hierarchical_levels]
1187                                   [pcs_ptr->tpl_data.tpl_temporal_layer_index],
1188                 8);
1189         qIndex = (qIndex + delta_qindex);
1190     }
1191     *base_rdmult = svt_av1_compute_rd_mult_based_on_qindex((AomBitDepth)8/*scs_ptr->static_config.encoder_bit_depth*/, qIndex) / 6;
1192 
1193     {
1194         {
1195 
1196 
1197         // reset number of TPLed sbs per pic
1198         pcs_ptr->tpl_disp_coded_sb_count = 0;
1199 
1200         EbObjectWrapper *out_results_wrapper_ptr;
1201 
1202         // TPL dispenser kernel
1203         svt_get_empty_object(
1204                 context_ptr->sbo_output_fifo_ptr,
1205                 &out_results_wrapper_ptr);
1206 
1207         TplDispResults *out_results_ptr = (TplDispResults*)out_results_wrapper_ptr->object_ptr;
1208        // out_results_ptr->pcs_wrapper_ptr = pcs_ptr->p_pcs_wrapper_ptr;
1209         out_results_ptr->pcs_ptr = pcs_ptr;
1210         out_results_ptr->input_type       = TPL_TASKS_MDC_INPUT;
1211         out_results_ptr->tile_group_index = /*tile_group_idx*/0;
1212 
1213         out_results_ptr->frame_index = frame_idx;
1214         out_results_ptr->qIndex = qIndex;
1215 
1216         svt_post_full_object(out_results_wrapper_ptr);
1217 
1218         svt_block_on_semaphore(pcs_ptr->tpl_disp_done_semaphore); // we can do all in // ?
1219 
1220 
1221         }
1222     }
1223 
1224     // padding current recon picture
1225     generate_padding(recon_picture_ptr->buffer_y,
1226                      recon_picture_ptr->stride_y,
1227                      recon_picture_ptr->width,
1228                      recon_picture_ptr->height,
1229                      recon_picture_ptr->origin_x,
1230                      recon_picture_ptr->origin_y);
1231 
1232     return;
1233 }
1234 
1235 
get_overlap_area(int grid_pos_row,int grid_pos_col,int ref_pos_row,int ref_pos_col,int block,int bsize)1236 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row, int ref_pos_col,
1237                             int block, int /*BLOCK_SIZE*/ bsize) {
1238     int width = 0, height = 0;
1239     int bw = 4 << mi_size_wide_log2[bsize];
1240     int bh = 4 << mi_size_high_log2[bsize];
1241 
1242     switch (block) {
1243     case 0:
1244         width  = grid_pos_col + bw - ref_pos_col;
1245         height = grid_pos_row + bh - ref_pos_row;
1246         break;
1247     case 1:
1248         width  = ref_pos_col + bw - grid_pos_col;
1249         height = grid_pos_row + bh - ref_pos_row;
1250         break;
1251     case 2:
1252         width  = grid_pos_col + bw - ref_pos_col;
1253         height = ref_pos_row + bh - grid_pos_row;
1254         break;
1255     case 3:
1256         width  = ref_pos_col + bw - grid_pos_col;
1257         height = ref_pos_row + bh - grid_pos_row;
1258         break;
1259     default: assert(0);
1260     }
1261 
1262     return width * height;
1263 }
1264 
round_floor(int ref_pos,int bsize_pix)1265 static int round_floor(int ref_pos, int bsize_pix) {
1266     int round;
1267     if (ref_pos < 0)
1268         round = -(1 + (-ref_pos - 1) / bsize_pix);
1269     else
1270         round = ref_pos / bsize_pix;
1271 
1272     return round;
1273 }
1274 
delta_rate_cost(int64_t delta_rate,int64_t recrf_dist,int64_t srcrf_dist,int pix_num)1275 static int64_t delta_rate_cost(int64_t delta_rate, int64_t recrf_dist, int64_t srcrf_dist,
1276                                int pix_num) {
1277     double  beta      = (double)srcrf_dist / recrf_dist;
1278     int64_t rate_cost = delta_rate;
1279 
1280     if (srcrf_dist <= 128)
1281         return rate_cost;
1282 
1283     double dr = (double)(delta_rate >> (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT)) / pix_num;
1284 
1285     double log_den = log(beta) / log(2.0) + 2.0 * dr;
1286 
1287     if (log_den > log(10.0) / log(2.0)) {
1288         rate_cost = (int64_t)((log(1.0 / beta) * pix_num) / log(2.0) / 2.0);
1289         rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT);
1290         return rate_cost;
1291     }
1292 
1293     double num = pow(2.0, log_den);
1294     double den = num * beta + (1 - beta) * beta;
1295 
1296     rate_cost = (int64_t)((pix_num * log(num / den)) / log(2.0) / 2.0);
1297 
1298     rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT);
1299 
1300     return rate_cost;
1301 }
1302 /************************************************
1303 * Genrate TPL MC Flow Synthesizer
1304 ************************************************/
1305 
1306 
tpl_model_update_b(PictureParentControlSet * ref_pcs_ptr,PictureParentControlSet * pcs_ptr,TplStats * tpl_stats_ptr,int mi_row,int mi_col,const int bsize)1307 static AOM_INLINE void tpl_model_update_b(PictureParentControlSet *ref_pcs_ptr, PictureParentControlSet *pcs_ptr,
1308     TplStats *tpl_stats_ptr,
1309     int mi_row, int mi_col,
1310     const int/*BLOCK_SIZE*/ bsize) {
1311     Av1Common *ref_cm = ref_pcs_ptr->av1_cm;
1312     TplStats * ref_tpl_stats_ptr;
1313 
1314     const FULLPEL_MV full_mv     = get_fullmv_from_mv(&tpl_stats_ptr->mv);
1315     const int        ref_pos_row = mi_row * MI_SIZE + full_mv.row;
1316     const int        ref_pos_col = mi_col * MI_SIZE + full_mv.col;
1317 
1318     const int bw         = 4 << mi_size_wide_log2[bsize];
1319     const int bh         = 4 << mi_size_high_log2[bsize];
1320     const int mi_height  = mi_size_high[bsize];
1321     const int mi_width   = mi_size_wide[bsize];
1322     const int pix_num    = bw * bh;
1323     const int shift      = pcs_ptr->is_720p_or_larger ? 2 : 1;
1324     const int mi_cols_sr = ((ref_pcs_ptr->aligned_width + 15) / 16) << 2;
1325 
1326     // top-left on grid block location in pixel
1327     int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
1328     int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
1329     int block;
1330 
1331     int64_t cur_dep_dist = tpl_stats_ptr->recrf_dist - tpl_stats_ptr->srcrf_dist;
1332     int64_t mc_dep_dist  = (int64_t)(
1333         tpl_stats_ptr->mc_dep_dist *
1334         ((double)(tpl_stats_ptr->recrf_dist - tpl_stats_ptr->srcrf_dist) /
1335          tpl_stats_ptr->recrf_dist));
1336     int64_t delta_rate  = tpl_stats_ptr->recrf_rate - tpl_stats_ptr->srcrf_rate;
1337     int64_t mc_dep_rate = pcs_ptr->tpl_ctrls.tpl_opt_flag ? 0
1338 
1339         : delta_rate_cost(tpl_stats_ptr->mc_dep_rate,
1340                           tpl_stats_ptr->recrf_dist,
1341                           tpl_stats_ptr->srcrf_dist,
1342                           pix_num);
1343 
1344     for (block = 0; block < 4; ++block) {
1345         int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
1346         int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
1347 
1348         if (grid_pos_row >= 0 && grid_pos_row < ref_cm->mi_rows * MI_SIZE && grid_pos_col >= 0 &&
1349             grid_pos_col < ref_cm->mi_cols * MI_SIZE) {
1350             int overlap_area = get_overlap_area(
1351                 grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
1352             int       ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
1353             int       ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
1354             const int step       = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
1355 
1356             for (int idy = 0; idy < mi_height; idy += step) {
1357                 for (int idx = 0; idx < mi_width; idx += step) {
1358                     ref_tpl_stats_ptr = ref_pcs_ptr->tpl_stats[((ref_mi_row + idy) >> shift) *
1359                                                                    (mi_cols_sr >> shift) +
1360                                                                ((ref_mi_col + idx) >> shift)];
1361                     ref_tpl_stats_ptr->mc_dep_dist += ((cur_dep_dist + mc_dep_dist) *
1362                                                        overlap_area) /
1363                         pix_num;
1364                     ref_tpl_stats_ptr->mc_dep_rate += ((delta_rate + mc_dep_rate) * overlap_area) /
1365                         pix_num;
1366                     assert(overlap_area >= 0);
1367                 }
1368             }
1369         }
1370     }
1371 }
1372 
1373 /************************************************
1374 * Genrate TPL MC Flow Synthesizer
1375 ************************************************/
1376 
1377 
tpl_model_update(PictureParentControlSet * pcs_array[MAX_TPL_LA_SW],int32_t frame_idx,int mi_row,int mi_col,const int bsize,uint8_t frames_in_sw)1378 static AOM_INLINE void tpl_model_update(
1379     PictureParentControlSet     *pcs_array[MAX_TPL_LA_SW],
1380     int32_t frame_idx, int mi_row, int mi_col,
1381     const int/*BLOCK_SIZE*/ bsize, uint8_t frames_in_sw) {
1382     const int                mi_height  = mi_size_high[bsize];
1383     const int                mi_width   = mi_size_wide[bsize];
1384     PictureParentControlSet  *pcs_ptr = pcs_array[frame_idx];
1385     const int /*BLOCK_SIZE*/ block_size = pcs_ptr->is_720p_or_larger ? BLOCK_16X16 : BLOCK_8X8;
1386     const int                step       = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
1387     const int                shift      = pcs_ptr->is_720p_or_larger ? 2 : 1;
1388     const int                mi_cols_sr = ((pcs_ptr->aligned_width + 15) / 16) << 2;
1389     int                      i          = 0;
1390 
1391     for (int idy = 0; idy < mi_height; idy += step) {
1392         for (int idx = 0; idx < mi_width; idx += step) {
1393             TplStats *tpl_stats_ptr =
1394                 pcs_ptr->tpl_stats[(((mi_row + idy) >> shift) * (mi_cols_sr >> shift)) +
1395                                    ((mi_col + idx) >> shift)];
1396 
1397             while (i < frames_in_sw && pcs_array[i]->picture_number != tpl_stats_ptr->ref_frame_poc)
1398                 i++;
1399             if (i < frames_in_sw)
1400                 tpl_model_update_b(
1401                     pcs_array[i], pcs_ptr, tpl_stats_ptr, mi_row + idy, mi_col + idx, block_size);
1402         }
1403     }
1404 }
1405 
1406 
1407 
1408 /************************************************
1409 * Genrate TPL MC Flow Synthesizer Based on Lookahead
1410 ** LAD Window: sliding window size
1411 ************************************************/
1412 
1413 
tpl_mc_flow_synthesizer(PictureParentControlSet * pcs_array[MAX_TPL_LA_SW],int32_t frame_idx,uint8_t frames_in_sw)1414 void tpl_mc_flow_synthesizer(
1415     PictureParentControlSet         *pcs_array[MAX_TPL_LA_SW],
1416     int32_t                          frame_idx,
1417     uint8_t                          frames_in_sw)
1418 {
1419     Av1Common *              cm        = pcs_array[frame_idx]->av1_cm;
1420     const int /*BLOCK_SIZE*/ bsize     = BLOCK_16X16;
1421     const int                mi_height = mi_size_high[bsize];
1422     const int                mi_width  = mi_size_wide[bsize];
1423 
1424     for (int mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1425         for (int mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1426             tpl_model_update(pcs_array, frame_idx, mi_row, mi_col, bsize, frames_in_sw);
1427         }
1428     }
1429     return;
1430 }
1431 
1432 
generate_r0beta(PictureParentControlSet * pcs_ptr)1433 static void generate_r0beta(PictureParentControlSet *pcs_ptr) {
1434     Av1Common *         cm               = pcs_ptr->av1_cm;
1435     SequenceControlSet *scs_ptr          = pcs_ptr->scs_ptr;
1436     int64_t             intra_cost_base  = 0;
1437     int64_t             mc_dep_cost_base = 0;
1438     const int           step             = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
1439     const int           mi_cols_sr       = ((pcs_ptr->aligned_width + 15) / 16) << 2;
1440     const int           shift            = pcs_ptr->is_720p_or_larger ? 2 : 1;
1441 
1442     for (int row = 0; row < cm->mi_rows; row += step) {
1443         for (int col = 0; col < mi_cols_sr; col += step) {
1444             TplStats *tpl_stats_ptr =
1445                 pcs_ptr->tpl_stats[(row >> shift) * (mi_cols_sr >> shift) + (col >> shift)];
1446             int64_t mc_dep_delta = RDCOST(
1447                 pcs_ptr->base_rdmult, tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->mc_dep_dist);
1448             intra_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS);
1449             mc_dep_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
1450         }
1451     }
1452 
1453     if (mc_dep_cost_base != 0) {
1454         pcs_ptr->r0 = (double)intra_cost_base / mc_dep_cost_base;
1455         pcs_ptr->tpl_is_valid = 1;
1456     }
1457     else {
1458         pcs_ptr->tpl_is_valid = 0;
1459     }
1460 
1461 #if DEBUG_TPL
1462     SVT_LOG("generate_r0beta ------> poc %ld\t%.0f\t%.0f \t%.5f base_rdmult=%d\n",
1463             pcs_ptr->picture_number,
1464             (double)intra_cost_base,
1465             (double)mc_dep_cost_base,
1466             pcs_ptr->r0,
1467             pcs_ptr->base_rdmult);
1468 #endif
1469     generate_lambda_scaling_factor(pcs_ptr, mc_dep_cost_base);
1470 
1471     const uint32_t sb_sz            = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? 128 : 64;
1472     const uint32_t picture_sb_width = (uint32_t)((scs_ptr->seq_header.max_frame_width + sb_sz - 1) /
1473                                                  sb_sz);
1474     const uint32_t picture_sb_height = (uint32_t)(
1475         (scs_ptr->seq_header.max_frame_height + sb_sz - 1) / sb_sz);
1476     const uint32_t picture_width_in_mb  = (scs_ptr->seq_header.max_frame_width + 16 - 1) / 16;
1477     const uint32_t picture_height_in_mb = (scs_ptr->seq_header.max_frame_height + 16 - 1) / 16;
1478     const uint32_t blks                 = scs_ptr->seq_header.sb_size == BLOCK_128X128
1479                         ? (128 >> (3 + pcs_ptr->is_720p_or_larger))
1480                         : (64 >> (3 + pcs_ptr->is_720p_or_larger));
1481     for (uint32_t sb_y = 0; sb_y < picture_sb_height; ++sb_y) {
1482         for (uint32_t sb_x = 0; sb_x < picture_sb_width; ++sb_x) {
1483             int64_t intra_cost  = 0;
1484             int64_t mc_dep_cost = 0;
1485             for (uint32_t blky_offset = 0; blky_offset < blks; blky_offset++) {
1486                 for (uint32_t blkx_offset = 0; blkx_offset < blks; blkx_offset++) {
1487                     uint32_t blkx = ((sb_x * sb_sz) >> (3 + pcs_ptr->is_720p_or_larger)) +
1488                         blkx_offset;
1489                     uint32_t blky = ((sb_y * sb_sz) >> (3 + pcs_ptr->is_720p_or_larger)) +
1490                         blky_offset;
1491                     if ((blkx >> (1 - pcs_ptr->is_720p_or_larger)) >= picture_width_in_mb ||
1492                         (blky >> (1 - pcs_ptr->is_720p_or_larger)) >= picture_height_in_mb)
1493                         continue;
1494                     TplStats *tpl_stats_ptr =
1495                         pcs_ptr->tpl_stats[blky * (mi_cols_sr >> shift) + blkx];
1496                     int64_t mc_dep_delta = RDCOST(pcs_ptr->base_rdmult,
1497                                                   tpl_stats_ptr->mc_dep_rate,
1498                                                   tpl_stats_ptr->mc_dep_dist);
1499                     intra_cost += (tpl_stats_ptr->recrf_dist << RDDIV_BITS);
1500                     mc_dep_cost += (tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
1501                 }
1502             }
1503             double beta = 1.0;
1504             if (mc_dep_cost > 0 && intra_cost > 0) {
1505                 double rk = (double)intra_cost / mc_dep_cost;
1506                 beta      = (pcs_ptr->r0 / rk);
1507                 assert(beta > 0.0);
1508             }
1509             pcs_ptr->tpl_beta[sb_y * picture_sb_width + sb_x] = beta;
1510         }
1511     }
1512     return;
1513 }
1514 /************************************************
1515 * Allocate and initialize buffers needed for tpl
1516 ************************************************/
init_tpl_buffers(EncodeContext * encode_context_ptr,PictureParentControlSet * pcs_ptr)1517 EbErrorType init_tpl_buffers(
1518     EncodeContext                   *encode_context_ptr,
1519     PictureParentControlSet         *pcs_ptr){
1520     int32_t frames_in_sw = MIN(MAX_TPL_LA_SW, pcs_ptr->tpl_group_size);
1521     int32_t frame_idx;
1522 
1523     for (frame_idx = 0; frame_idx < MAX_TPL_LA_SW; frame_idx++) {
1524         encode_context_ptr->poc_map_idx[frame_idx]                = -1;
1525         encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] = NULL;
1526     }
1527     EbPictureBufferDescInitData picture_buffer_desc_init_data;
1528     picture_buffer_desc_init_data.max_width          = pcs_ptr->enhanced_picture_ptr->max_width;
1529     picture_buffer_desc_init_data.max_height         = pcs_ptr->enhanced_picture_ptr->max_height;
1530     picture_buffer_desc_init_data.bit_depth          = pcs_ptr->enhanced_picture_ptr->bit_depth;
1531     picture_buffer_desc_init_data.color_format       = pcs_ptr->enhanced_picture_ptr->color_format;
1532     picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG;
1533     picture_buffer_desc_init_data.left_padding       = TPL_PADX;
1534     picture_buffer_desc_init_data.right_padding      = TPL_PADX;
1535     picture_buffer_desc_init_data.top_padding        = TPL_PADY;
1536     picture_buffer_desc_init_data.bot_padding        = TPL_PADY;
1537     picture_buffer_desc_init_data.split_mode         = EB_FALSE;
1538 
1539     EB_NEW(encode_context_ptr->mc_flow_rec_picture_buffer_noref,
1540            svt_picture_buffer_desc_ctor,
1541            (EbPtr)&picture_buffer_desc_init_data);
1542 
1543     for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1544         if (pcs_ptr->tpl_valid_pic[frame_idx]) {
1545             EB_NEW(encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx],
1546                    svt_picture_buffer_desc_ctor,
1547                    (EbPtr)&picture_buffer_desc_init_data);
1548         } else {
1549             encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] =
1550                 encode_context_ptr->mc_flow_rec_picture_buffer_noref;
1551         }
1552     }
1553     return EB_ErrorNone;
1554 }
1555 
1556 
1557 
1558 
1559 /************************************************
1560 * init tpl tpl_disp_segment_ctrl
1561 ************************************************/
init_tpl_segments(SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr,PictureParentControlSet ** pcs_array,int32_t frames_in_sw)1562 void init_tpl_segments(
1563     SequenceControlSet              *scs_ptr,
1564     PictureParentControlSet         *pcs_ptr,
1565     PictureParentControlSet        **pcs_array,
1566     int32_t                         frames_in_sw) {
1567 
1568     for (int32_t frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1569         uint32_t enc_dec_seg_col_cnt = scs_ptr->tpl_segment_col_count_array;
1570         uint32_t enc_dec_seg_row_cnt = scs_ptr->tpl_segment_row_count_array;
1571 
1572         const int tile_cols = pcs_ptr->av1_cm->tiles_info.tile_cols;
1573         const int tile_rows = pcs_ptr->av1_cm->tiles_info.tile_rows;
1574         uint8_t   tile_group_cols = MIN(
1575             tile_cols,
1576             scs_ptr->tile_group_col_count_array[pcs_ptr->temporal_layer_index]);
1577         uint8_t tile_group_rows = MIN(
1578             tile_rows,
1579             scs_ptr->tile_group_row_count_array[pcs_ptr->temporal_layer_index]);
1580 
1581         // Valid when only one tile used
1582         // TPL segments + tiles (not working)
1583         // TPL segments are 64x64 SB based
1584         uint16_t                        pic_width_in_sb;
1585         uint16_t                        pic_height_in_sb;
1586         pic_width_in_sb = (pcs_ptr->aligned_width + scs_ptr->sb_sz - 1) / scs_ptr->sb_sz;
1587         pic_height_in_sb   = (pcs_ptr->aligned_height + scs_ptr->sb_sz - 1) / scs_ptr->sb_sz;
1588 
1589         if (tile_group_cols * tile_group_rows > 1) {
1590             enc_dec_seg_col_cnt = MIN(enc_dec_seg_col_cnt,
1591                 (uint8_t)(pic_width_in_sb / tile_group_cols));
1592             enc_dec_seg_row_cnt = MIN(
1593                 enc_dec_seg_row_cnt,
1594                 (uint8_t)(pic_height_in_sb / tile_group_rows));
1595         }
1596         // Init segments within the tile group
1597         int      sb_size_log2 = scs_ptr->seq_header.sb_size_log2;
1598 
1599         uint8_t tile_group_col_start_tile_idx[1024];
1600         uint8_t tile_group_row_start_tile_idx[1024];
1601 
1602         // Get the tile start index for tile group
1603         for (uint8_t c = 0; c <= tile_group_cols; c++) {
1604             tile_group_col_start_tile_idx[c] = c * tile_cols / tile_group_cols;
1605         }
1606         for (uint8_t r = 0; r <= tile_group_rows; r++) {
1607             tile_group_row_start_tile_idx[r] = r * tile_rows / tile_group_rows;
1608         }
1609 
1610         for (uint8_t r = 0; r < tile_group_rows; r++) {
1611             for (uint8_t c = 0; c < tile_group_cols; c++) {
1612                 uint16_t tile_group_idx = r * tile_group_cols + c;
1613                 uint16_t top_left_tile_col_idx = tile_group_col_start_tile_idx[c];
1614                 uint16_t top_left_tile_row_idx = tile_group_row_start_tile_idx[r];
1615                 uint16_t bottom_right_tile_col_idx =
1616                     tile_group_col_start_tile_idx[c + 1];
1617                 uint16_t bottom_right_tile_row_idx =
1618                     tile_group_row_start_tile_idx[r + 1];
1619 
1620                 TileGroupInfo *tg_info_ptr =
1621                     &pcs_array[frame_idx]->tile_group_info[tile_group_idx];
1622 
1623                 tg_info_ptr->tile_group_tile_start_x = top_left_tile_col_idx;
1624                 tg_info_ptr->tile_group_tile_end_x = bottom_right_tile_col_idx;
1625 
1626                 tg_info_ptr->tile_group_tile_start_y = top_left_tile_row_idx;
1627                 tg_info_ptr->tile_group_tile_end_y = bottom_right_tile_row_idx;
1628 
1629                 tg_info_ptr->tile_group_sb_start_x =
1630                     pcs_ptr->av1_cm->tiles_info.tile_col_start_mi[top_left_tile_col_idx] >>
1631                     sb_size_log2;
1632                 tg_info_ptr->tile_group_sb_start_y =
1633                     pcs_ptr->av1_cm->tiles_info.tile_row_start_mi[top_left_tile_row_idx] >>
1634                     sb_size_log2;
1635 
1636 
1637 
1638 
1639                 // Get the SB end of the bottom right tile
1640                 tg_info_ptr->tile_group_sb_end_x = pic_width_in_sb ;
1641                     //(pcs_ptr->av1_cm->tiles_info.tile_col_start_mi[bottom_right_tile_col_idx] >>
1642                     //    sb_size_log2);
1643                 tg_info_ptr->tile_group_sb_end_y = pic_height_in_sb;
1644                     //(pcs_ptr->av1_cm->tiles_info.tile_row_start_mi[bottom_right_tile_row_idx] >>
1645                     //    sb_size_log2);
1646 
1647                 // Get the width/height of tile group in SB
1648                 tg_info_ptr->tile_group_height_in_sb =
1649                     tg_info_ptr->tile_group_sb_end_y -
1650                     tg_info_ptr->tile_group_sb_start_y;
1651                 tg_info_ptr->tile_group_width_in_sb =
1652                     tg_info_ptr->tile_group_sb_end_x -
1653                     tg_info_ptr->tile_group_sb_start_x;
1654 
1655                 enc_dec_segments_init(
1656                     pcs_array[frame_idx]->tpl_disp_segment_ctrl[tile_group_idx],
1657                     enc_dec_seg_col_cnt,
1658                     enc_dec_seg_row_cnt,
1659                     tg_info_ptr->tile_group_width_in_sb,
1660                     tg_info_ptr->tile_group_height_in_sb);
1661             }
1662         }
1663     }
1664 }
1665 
1666 
1667 /************************************************
1668 * Genrate TPL MC Flow Based on frames in the tpl group
1669 ************************************************/
tpl_mc_flow(EncodeContext * encode_context_ptr,SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr,SourceBasedOperationsContext * context_ptr)1670 EbErrorType tpl_mc_flow(EncodeContext *encode_context_ptr, SequenceControlSet *scs_ptr,
1671                         PictureParentControlSet *pcs_ptr,  SourceBasedOperationsContext    *context_ptr) {
1672 
1673     int32_t  frames_in_sw = MIN(MAX_TPL_LA_SW, pcs_ptr->tpl_group_size);
1674     int32_t  frame_idx;
1675     uint32_t shift                = pcs_ptr->is_720p_or_larger ? 0 : 1;
1676     uint32_t picture_width_in_mb  = (pcs_ptr->enhanced_picture_ptr->width + 16 - 1) / 16;
1677     uint32_t picture_height_in_mb = (pcs_ptr->enhanced_picture_ptr->height + 16 - 1) / 16;
1678 
1679     //wait for PA ME to be done.
1680     for (uint32_t i = 1; i < pcs_ptr->tpl_group_size; i++) {
1681         svt_wait_cond_var(&pcs_ptr->tpl_group[i]->me_ready, 0);
1682     }
1683     pcs_ptr->tpl_is_valid = 0;
1684     init_tpl_buffers(encode_context_ptr, pcs_ptr);
1685 
1686     if (pcs_ptr->tpl_group[0]->tpl_data.tpl_temporal_layer_index == 0) {
1687 
1688 
1689         // no Tiles path
1690         if (scs_ptr->static_config.tile_rows == 0 && scs_ptr->static_config.tile_columns == 0 )
1691             init_tpl_segments(
1692                 scs_ptr,
1693                 pcs_ptr,
1694                 pcs_ptr->tpl_group,
1695                 frames_in_sw) ;
1696 
1697 
1698 
1699         uint8_t tpl_on;
1700         encode_context_ptr->poc_map_idx[0] = pcs_ptr->tpl_group[0]->picture_number;
1701         for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1702             encode_context_ptr->poc_map_idx[frame_idx] = pcs_ptr->tpl_group[frame_idx]->picture_number;
1703             for (uint32_t blky = 0; blky < (picture_height_in_mb << shift); blky++) {
1704                 memset(pcs_ptr->tpl_group[frame_idx]->tpl_stats[blky * (picture_width_in_mb << shift)],
1705                         0,
1706                         (picture_width_in_mb << shift) * sizeof(TplStats));
1707             }
1708             if(scs_ptr->lad_mg)
1709                 tpl_on = pcs_ptr->tpl_valid_pic[frame_idx];
1710             else {
1711                 tpl_on = !(pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_nref);
1712                 tpl_on = (pcs_ptr->tpl_group[0]->slice_type == I_SLICE) ? 1 : tpl_on;
1713                 if (tpl_on == 0) {
1714                     tpl_on = pcs_ptr->tpl_group[frame_idx]->tpl_data.is_used_as_reference_flag ? 1 :
1715                         (ABS((int64_t)pcs_ptr->tpl_group[0]->picture_number -
1716                         (int64_t)pcs_ptr->tpl_group[frame_idx]->picture_number)
1717                         <= pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_pic_dist) ? 1 : tpl_on;
1718                 }
1719             }
1720             if (tpl_on)
1721                 tpl_mc_flow_dispenser(encode_context_ptr, scs_ptr, &pcs_ptr->base_rdmult, pcs_ptr->tpl_group[frame_idx], frame_idx,context_ptr);
1722         }
1723 
1724         // synthesizer
1725         for (frame_idx = frames_in_sw - 1; frame_idx >= 0; frame_idx--) {
1726             if(scs_ptr->lad_mg)
1727                 tpl_on = pcs_ptr->tpl_valid_pic[frame_idx];
1728             else {
1729                 tpl_on = !(pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_nref);
1730                 tpl_on = (pcs_ptr->tpl_group[0]->slice_type == I_SLICE) ? 1 : tpl_on;
1731                 if (tpl_on == 0) {
1732                     tpl_on = pcs_ptr->tpl_group[frame_idx]->tpl_data.is_used_as_reference_flag ? 1 :
1733                         (ABS((int64_t)pcs_ptr->tpl_group[0]->picture_number -
1734                         (int64_t)pcs_ptr->tpl_group[frame_idx]->picture_number)
1735                             <= pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_pic_dist) ? 1 : tpl_on;
1736                 }
1737             }
1738             if (tpl_on)
1739                 tpl_mc_flow_synthesizer(pcs_ptr->tpl_group, frame_idx, frames_in_sw);
1740         }
1741 
1742         // generate tpl stats
1743         generate_r0beta(pcs_ptr);
1744 #if DEBUG_TPL
1745         SVT_LOG("LOG displayorder:%ld\n",
1746             pcs_array[0]->picture_number);
1747         for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++)
1748         {
1749             PictureParentControlSet *pcs_ptr_tmp = pcs_array[frame_idx];
1750             Av1Common *cm = pcs_ptr->av1_cm;
1751             SequenceControlSet *scs_ptr = pcs_ptr_tmp->scs_ptr;
1752             int64_t intra_cost_base = 0;
1753             int64_t mc_dep_cost_base = 0;
1754             const int step = 1 << (pcs_ptr_tmp->is_720p_or_larger ? 2 : 1);
1755             const int mi_cols_sr = ((pcs_ptr_tmp->aligned_width + 15) / 16) << 2;
1756             const int shift = pcs_ptr_tmp->is_720p_or_larger ? 2 : 1;
1757 
1758             for (int row = 0; row < cm->mi_rows; row += step) {
1759                 for (int col = 0; col < mi_cols_sr; col += step) {
1760                     TplStats *tpl_stats_ptr = pcs_ptr_tmp->tpl_stats[(row >> shift) * (mi_cols_sr >> shift) + (col >> shift)];
1761                     int64_t mc_dep_delta =
1762                         RDCOST(pcs_ptr->base_rdmult, tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->mc_dep_dist);
1763                     intra_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS);
1764                     mc_dep_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
1765                 }
1766             }
1767 
1768             SVT_LOG("After mc_flow_synthesizer:\tframe_indx:%d\tdisplayorder:%ld\tIntra:%lld\tmc_dep:%lld rdmult:%i\n",
1769                 frame_idx, pcs_ptr_tmp->picture_number, intra_cost_base, mc_dep_cost_base, pcs_ptr->base_rdmult);
1770         }
1771 #endif
1772 
1773 
1774 
1775     }
1776 
1777     for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1778         if (encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] &&
1779             encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] !=
1780                 encode_context_ptr->mc_flow_rec_picture_buffer_noref)
1781             EB_DELETE(encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx]);
1782     }
1783     EB_DELETE(encode_context_ptr->mc_flow_rec_picture_buffer_noref);
1784 
1785     for (uint32_t i = 0; i < pcs_ptr->tpl_group_size; i++) {
1786         if (pcs_ptr->tpl_group[i]->slice_type == P_SLICE) {
1787             if (pcs_ptr->tpl_group[i]->ext_mg_id == pcs_ptr->ext_mg_id + 1)
1788                 release_pa_reference_objects(scs_ptr, pcs_ptr->tpl_group[i]);
1789         }
1790         else {
1791             if (pcs_ptr->tpl_group[i]->ext_mg_id == pcs_ptr->ext_mg_id)
1792                 release_pa_reference_objects(scs_ptr, pcs_ptr->tpl_group[i]);
1793         }
1794         if (pcs_ptr->tpl_group[i]->non_tf_input)
1795             EB_DELETE(pcs_ptr->tpl_group[i]->non_tf_input);
1796     }
1797 
1798     return EB_ErrorNone;
1799 }
1800 
1801 
1802 /*
1803    TPL dispenser kernel
1804    process one picture of TPL group
1805 */
1806 
1807 
tpl_disp_kernel(void * input_ptr)1808 void *tpl_disp_kernel(void *input_ptr) {
1809     EbThreadContext *             thread_context_ptr = (EbThreadContext *)input_ptr;
1810     TplDispenserContext *context_ptr =
1811         (TplDispenserContext *)thread_context_ptr->priv;
1812     EbObjectWrapper *          in_results_wrapper_ptr;
1813     TplDispResults *in_results_ptr;
1814     for (;;) {
1815         // Get Input Full Object
1816         EB_GET_FULL_OBJECT(context_ptr->tpl_disp_input_fifo_ptr,
1817                            &in_results_wrapper_ptr);
1818 
1819         in_results_ptr = (TplDispResults *)in_results_wrapper_ptr->object_ptr;
1820 
1821         PictureParentControlSet* pcs_ptr = in_results_ptr->pcs_ptr;
1822 
1823         SequenceControlSet* scs_ptr = (SequenceControlSet *)pcs_ptr->scs_ptr;
1824 
1825         int32_t frame_idx =in_results_ptr->frame_index;
1826         context_ptr->coded_sb_count   = 0;
1827 
1828         uint16_t tile_group_width_in_sb = pcs_ptr->tile_group_info[0/*context_ptr->tile_group_index*/] //  1 tile
1829                                               .tile_group_width_in_sb;
1830         EncDecSegments *segments_ptr;
1831 
1832         segments_ptr = pcs_ptr->tpl_disp_segment_ctrl[0/*context_ptr->tile_group_index*/]; //  1 tile
1833     // Segments
1834     uint16_t        segment_index;
1835 
1836     uint8_t sb_sz      = (uint8_t)scs_ptr->sb_sz ;
1837     uint8_t sb_size_log2 = (uint8_t)svt_log2f(sb_sz);
1838     uint32_t pic_width_in_sb = (pcs_ptr->aligned_width + sb_sz - 1) >> sb_size_log2;
1839 
1840     segment_index = 0;
1841     // no Tiles path
1842     if (scs_ptr->static_config.tile_rows == 0 && scs_ptr->static_config.tile_columns == 0 ){
1843         // segments loop
1844         while (
1845             assign_tpl_segments(
1846                 segments_ptr,
1847                 &segment_index,
1848                 in_results_ptr,
1849                 frame_idx,
1850                 context_ptr->tpl_disp_fb_fifo_ptr)
1851             == EB_TRUE) {
1852 
1853             uint32_t        x_sb_start_index;
1854             uint32_t        y_sb_start_index;
1855             uint32_t        sb_start_index;
1856             uint32_t        sb_segment_count;
1857             uint32_t        sb_segment_index;
1858             uint32_t        segment_row_index;
1859             uint32_t        segment_band_index;
1860             uint32_t        segment_band_size;
1861             // SB Loop variables
1862             uint32_t        x_sb_index;
1863             uint32_t        y_sb_index;
1864 
1865             x_sb_start_index = segments_ptr->x_start_array[segment_index];
1866             y_sb_start_index = segments_ptr->y_start_array[segment_index];
1867             sb_start_index = y_sb_start_index * tile_group_width_in_sb + x_sb_start_index;
1868             sb_segment_count = segments_ptr->valid_sb_count_array[segment_index];
1869 
1870             segment_row_index = segment_index / segments_ptr->segment_band_count;
1871             segment_band_index =
1872                 segment_index - segment_row_index * segments_ptr->segment_band_count;
1873             segment_band_size = (segments_ptr->sb_band_count * (segment_band_index + 1) +
1874                 segments_ptr->segment_band_count - 1) /
1875                 segments_ptr->segment_band_count;
1876 
1877 
1878             for (y_sb_index = y_sb_start_index, sb_segment_index = sb_start_index;
1879                 sb_segment_index < sb_start_index + sb_segment_count;
1880                 ++y_sb_index) {
1881                 for (x_sb_index = x_sb_start_index;
1882                     x_sb_index < tile_group_width_in_sb &&
1883                     (x_sb_index + y_sb_index < segment_band_size) &&
1884                     sb_segment_index < sb_start_index + sb_segment_count;
1885                     ++x_sb_index, ++sb_segment_index) {
1886                     uint16_t tile_group_y_sb_start =
1887                         pcs_ptr->tile_group_info[0/*context_ptr->tile_group_index*/] //  1 tile
1888                         .tile_group_sb_start_y;
1889                     uint16_t tile_group_x_sb_start =
1890                         pcs_ptr->tile_group_info[0/*context_ptr->tile_group_index*/] //  1 tile
1891                         .tile_group_sb_start_x;
1892 
1893                     context_ptr->sb_index = (uint16_t)((y_sb_index + tile_group_y_sb_start) * pic_width_in_sb +
1894                         x_sb_index + tile_group_x_sb_start);
1895 
1896                     // TPL dispenser per SB (64)
1897                     tpl_mc_flow_dispenser_sb(
1898                         pcs_ptr->scs_ptr->encode_context_ptr,
1899                         scs_ptr,
1900                         pcs_ptr,
1901                         frame_idx,
1902                         context_ptr->sb_index,
1903                         in_results_ptr->qIndex);
1904 
1905                     context_ptr->coded_sb_count++;
1906 
1907                 }
1908 
1909                 x_sb_start_index = (x_sb_start_index > 0) ? x_sb_start_index - 1 : 0;
1910             }
1911         }
1912 
1913         svt_block_on_mutex(pcs_ptr->tpl_disp_mutex);
1914         pcs_ptr->tpl_disp_coded_sb_count += (uint32_t)context_ptr->coded_sb_count;
1915         EbBool last_sb_flag = (pcs_ptr->sb_total_count == pcs_ptr->tpl_disp_coded_sb_count);
1916 
1917         svt_release_mutex(pcs_ptr->tpl_disp_mutex);
1918         if (last_sb_flag)
1919             svt_post_semaphore(pcs_ptr->tpl_disp_done_semaphore);
1920     }
1921     else {
1922         // Tiles path does not suupport segments
1923         for (uint32_t sb_index = 0; sb_index < pcs_ptr->sb_total_count; ++sb_index) {
1924 
1925             tpl_mc_flow_dispenser_sb(
1926                 pcs_ptr->scs_ptr->encode_context_ptr,
1927                 scs_ptr,
1928                 pcs_ptr,
1929                 frame_idx,
1930                 sb_index,
1931                 in_results_ptr->qIndex);
1932         }
1933         svt_post_semaphore(pcs_ptr->tpl_disp_done_semaphore);
1934 
1935     }
1936         svt_release_object(in_results_wrapper_ptr);
1937 
1938     }
1939     return NULL;
1940 }
1941 
1942 
1943 
1944 
1945 /************************************************
1946  * Source Based Operations Kernel
1947  * Source-based operations process involves a number of analysis algorithms
1948  * to identify spatiotemporal characteristics of the input pictures.
1949  ************************************************/
source_based_operations_kernel(void * input_ptr)1950 void *source_based_operations_kernel(void *input_ptr) {
1951     EbThreadContext *             thread_context_ptr = (EbThreadContext *)input_ptr;
1952     SourceBasedOperationsContext *context_ptr        = (SourceBasedOperationsContext *)
1953                                                     thread_context_ptr->priv;
1954     PictureParentControlSet *  pcs_ptr;
1955     EbObjectWrapper *          in_results_wrapper_ptr;
1956     InitialRateControlResults *in_results_ptr;
1957     EbObjectWrapper *          out_results_wrapper_ptr;
1958 
1959     for (;;) {
1960         // Get Input Full Object
1961         EB_GET_FULL_OBJECT(context_ptr->initial_rate_control_results_input_fifo_ptr,
1962                            &in_results_wrapper_ptr);
1963 
1964         in_results_ptr = (InitialRateControlResults *)in_results_wrapper_ptr->object_ptr;
1965         pcs_ptr        = (PictureParentControlSet *)in_results_ptr->pcs_wrapper_ptr->object_ptr;
1966         context_ptr->complete_sb_count = 0;
1967         uint32_t sb_total_count        = pcs_ptr->sb_total_count;
1968         uint32_t sb_index;
1969 
1970         SequenceControlSet *scs_ptr = (SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr;
1971         // Get TPL ME
1972 
1973         if (scs_ptr->static_config.enable_tpl_la) {
1974 
1975             if (scs_ptr->static_config.enable_tpl_la &&
1976                 pcs_ptr->temporal_layer_index == 0) {
1977 
1978                 tpl_prep_info(pcs_ptr);
1979                 tpl_mc_flow(scs_ptr->encode_context_ptr, scs_ptr, pcs_ptr,context_ptr);
1980             }
1981         }
1982 
1983         /***********************************************SB-based operations************************************************************/
1984         for (sb_index = 0; sb_index < sb_total_count; ++sb_index) {
1985             SbParams *sb_params      = &pcs_ptr->sb_params_array[sb_index];
1986             EbBool    is_complete_sb = sb_params->is_complete_sb;
1987             if (is_complete_sb) {
1988                 context_ptr->complete_sb_count++;
1989             }
1990         }
1991         /*********************************************Picture-based operations**********************************************************/
1992 
1993         // Activity statistics derivation
1994         derive_picture_activity_statistics(pcs_ptr);
1995 
1996         // Get Empty Results Object
1997         svt_get_empty_object(context_ptr->picture_demux_results_output_fifo_ptr,
1998                              &out_results_wrapper_ptr);
1999 
2000         PictureDemuxResults *out_results_ptr = (PictureDemuxResults *)
2001                                                    out_results_wrapper_ptr->object_ptr;
2002         out_results_ptr->pcs_wrapper_ptr = in_results_ptr->pcs_wrapper_ptr;
2003         out_results_ptr->picture_type    = EB_PIC_INPUT;
2004 
2005         // Release the Input Results
2006         svt_release_object(in_results_wrapper_ptr);
2007 
2008         // Post the Full Results Object
2009         svt_post_full_object(out_results_wrapper_ptr);
2010     }
2011     return NULL;
2012 }
2013