1 /*
2 * Copyright(c) 2019 Intel Corporation
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at https://www.aomedia.org/license/software-license. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
10 */
11
12 #include "EbPictureControlSet.h"
13 #include "EbSequenceControlSet.h"
14
15 #include "EbSourceBasedOperationsProcess.h"
16 #include "EbInitialRateControlResults.h"
17 #include "EbPictureDemuxResults.h"
18 #ifdef ARCH_X86_64
19 #include <emmintrin.h>
20 #endif
21 #include "EbEncHandle.h"
22 #include "EbUtility.h"
23 #include "EbPictureManagerProcess.h"
24 #include "EbReferenceObject.h"
25 #include "EbTransforms.h"
26 #include "aom_dsp_rtcd.h"
27 #include "EbLog.h"
28 #include "EbIntraPrediction.h"
29 #include "EbMotionEstimation.h"
30 #include "EbEncDecResults.h"
31 #include "EbRateDistortionCost.h"
32
33 /**************************************
34 * Context
35 **************************************/
36
37 typedef struct SourceBasedOperationsContext {
38 EbDctor dctor;
39 EbFifo *initial_rate_control_results_input_fifo_ptr;
40 EbFifo *picture_demux_results_output_fifo_ptr;
41 EbFifo *sbo_output_fifo_ptr;
42 // local zz cost array
43 uint32_t complete_sb_count;
44 uint8_t *y_mean_ptr;
45 uint8_t *cr_mean_ptr;
46 uint8_t *cb_mean_ptr;
47 } SourceBasedOperationsContext;
48 typedef struct TplDispenserContext {
49 EbDctor dctor;
50 EbFifo *tpl_disp_input_fifo_ptr;
51 EbFifo *tpl_disp_fb_fifo_ptr;
52 uint32_t sb_index;
53 uint32_t coded_sb_count;
54 } TplDispenserContext;
55
source_based_operations_context_dctor(EbPtr p)56 static void source_based_operations_context_dctor(EbPtr p) {
57 EbThreadContext * thread_context_ptr = (EbThreadContext *)p;
58 SourceBasedOperationsContext *obj = (SourceBasedOperationsContext *)thread_context_ptr->priv;
59 EB_FREE_ARRAY(obj);
60 }
61
62 /************************************************
63 * Source Based Operation Context Constructor
64 ************************************************/
source_based_operations_context_ctor(EbThreadContext * thread_context_ptr,const EbEncHandle * enc_handle_ptr,int index)65 EbErrorType source_based_operations_context_ctor(EbThreadContext * thread_context_ptr,
66 const EbEncHandle *enc_handle_ptr, int index) {
67 SourceBasedOperationsContext *context_ptr;
68 EB_CALLOC_ARRAY(context_ptr, 1);
69 thread_context_ptr->priv = context_ptr;
70 thread_context_ptr->dctor = source_based_operations_context_dctor;
71
72 context_ptr->initial_rate_control_results_input_fifo_ptr =
73 svt_system_resource_get_consumer_fifo(
74 enc_handle_ptr->initial_rate_control_results_resource_ptr, index);
75
76 context_ptr->sbo_output_fifo_ptr= svt_system_resource_get_producer_fifo(
77 enc_handle_ptr->tpl_disp_res_srm, index);
78 context_ptr->picture_demux_results_output_fifo_ptr = svt_system_resource_get_producer_fifo(
79 enc_handle_ptr->picture_demux_results_resource_ptr, index);
80 return EB_ErrorNone;
81 }
82
83 /***************************************************
84 * Derives BEA statistics and set activity flags
85 ***************************************************/
derive_picture_activity_statistics(PictureParentControlSet * pcs_ptr)86 void derive_picture_activity_statistics(PictureParentControlSet *pcs_ptr)
87
88 {
89 uint64_t non_moving_index_min = ~0u;
90 uint64_t non_moving_index_max = 0;
91 uint64_t non_moving_index_sum = 0;
92 uint32_t complete_sb_count = 0;
93 uint32_t non_moving_sb_count = 0;
94 uint32_t sb_total_count = pcs_ptr->sb_total_count;
95
96 for (uint32_t sb_index = 0; sb_index < sb_total_count; ++sb_index) {
97 SbParams *sb_params = &pcs_ptr->sb_params_array[sb_index];
98 if (sb_params->is_complete_sb) {
99 non_moving_index_min = pcs_ptr->non_moving_index_array[sb_index] < non_moving_index_min
100 ? pcs_ptr->non_moving_index_array[sb_index]
101 : non_moving_index_min;
102
103 non_moving_index_max = pcs_ptr->non_moving_index_array[sb_index] > non_moving_index_max
104 ? pcs_ptr->non_moving_index_array[sb_index]
105 : non_moving_index_max;
106 if (pcs_ptr->non_moving_index_array[sb_index] < NON_MOVING_SCORE_1)
107 non_moving_sb_count++;
108 complete_sb_count++;
109
110 non_moving_index_sum += pcs_ptr->non_moving_index_array[sb_index];
111 }
112 }
113
114 if (complete_sb_count > 0) {
115 pcs_ptr->non_moving_index_average = (uint16_t)(non_moving_index_sum / complete_sb_count);
116 pcs_ptr->kf_zeromotion_pct = (non_moving_sb_count * 100) / complete_sb_count;
117 }
118 pcs_ptr->non_moving_index_min_distance = (uint16_t)(
119 ABS((int32_t)(pcs_ptr->non_moving_index_average) - (int32_t)non_moving_index_min));
120 pcs_ptr->non_moving_index_max_distance = (uint16_t)(
121 ABS((int32_t)(pcs_ptr->non_moving_index_average) - (int32_t)non_moving_index_max));
122 return;
123 }
124
125 /*
126 TPL dispenser context dctor
127 */
tpl_disp_context_dctor(EbPtr p)128 static void tpl_disp_context_dctor(EbPtr p) {
129 EbThreadContext * thread_context_ptr = (EbThreadContext *)p;
130 TplDispenserContext *obj = (TplDispenserContext *)thread_context_ptr->priv;
131 EB_FREE_ARRAY(obj);
132 }
133 /*
134 TPL dispenser context cctor
135 */
tpl_disp_context_ctor(EbThreadContext * thread_context_ptr,const EbEncHandle * enc_handle_ptr,int index,int tasks_index)136 EbErrorType tpl_disp_context_ctor(EbThreadContext * thread_context_ptr,
137 const EbEncHandle *enc_handle_ptr, int index, int tasks_index) {
138 TplDispenserContext *context_ptr;
139 EB_CALLOC_ARRAY(context_ptr, 1);
140
141 thread_context_ptr->priv = context_ptr;
142 thread_context_ptr->dctor = tpl_disp_context_dctor;
143
144 context_ptr->tpl_disp_input_fifo_ptr = svt_system_resource_get_consumer_fifo(
145 enc_handle_ptr->tpl_disp_res_srm, index);
146
147 context_ptr->tpl_disp_fb_fifo_ptr = svt_system_resource_get_producer_fifo(
148 enc_handle_ptr->tpl_disp_res_srm, tasks_index);
149
150 return EB_ErrorNone;
151 }
152
153
154 void tpl_prep_info(PictureParentControlSet *pcs) ;
155
156
157 // Generate lambda factor to tune lambda based on TPL stats
generate_lambda_scaling_factor(PictureParentControlSet * pcs_ptr,int64_t mc_dep_cost_base)158 static void generate_lambda_scaling_factor(PictureParentControlSet *pcs_ptr,
159 int64_t mc_dep_cost_base) {
160 Av1Common *cm = pcs_ptr->av1_cm;
161 const int step = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
162 const int mi_cols_sr = ((pcs_ptr->aligned_width + 15) / 16) << 2;
163
164 const int block_size = BLOCK_16X16;
165 const int num_mi_w = mi_size_wide[block_size];
166 const int num_mi_h = mi_size_high[block_size];
167 const int num_cols = (mi_cols_sr + num_mi_w - 1) / num_mi_w;
168 const int num_rows = (cm->mi_rows + num_mi_h - 1) / num_mi_h;
169 const int stride = mi_cols_sr >> (1 + pcs_ptr->is_720p_or_larger);
170 const double c = 1.2;
171
172 for (int row = 0; row < num_rows; row++) {
173 for (int col = 0; col < num_cols; col++) {
174 double intra_cost = 0.0;
175 double mc_dep_cost = 0.0;
176 const int index = row * num_cols + col;
177 for (int mi_row = row * num_mi_h; mi_row < (row + 1) * num_mi_h; mi_row += step) {
178 for (int mi_col = col * num_mi_w; mi_col < (col + 1) * num_mi_w; mi_col += step) {
179 if (mi_row >= cm->mi_rows || mi_col >= mi_cols_sr)
180 continue;
181
182 const int index1 = (mi_row >> (1 + pcs_ptr->is_720p_or_larger)) * stride +
183 (mi_col >> (1 + pcs_ptr->is_720p_or_larger));
184 TplStats *tpl_stats_ptr = pcs_ptr->tpl_stats[index1];
185 int64_t mc_dep_delta = RDCOST(pcs_ptr->base_rdmult,
186 tpl_stats_ptr->mc_dep_rate,
187 tpl_stats_ptr->mc_dep_dist);
188 intra_cost += (double)(tpl_stats_ptr->recrf_dist << RDDIV_BITS);
189 mc_dep_cost += (double)(tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
190 }
191 }
192 double rk = 0;
193 if (mc_dep_cost > 0 && intra_cost > 0) {
194 rk = intra_cost / mc_dep_cost;
195 }
196
197 pcs_ptr->tpl_rdmult_scaling_factors[index] = (mc_dep_cost_base) ? rk / pcs_ptr->r0 + c
198 : c;
199 }
200 }
201
202 return;
203 }
204
get_quantize_error(MacroblockPlane * p,const TranLow * coeff,TranLow * qcoeff,TranLow * dqcoeff,TxSize tx_size,uint16_t * eob,int64_t * recon_error,int64_t * sse)205 static AOM_INLINE void get_quantize_error(MacroblockPlane *p, const TranLow *coeff, TranLow *qcoeff,
206 TranLow *dqcoeff, TxSize tx_size, uint16_t *eob,
207 int64_t *recon_error, int64_t *sse) {
208 const ScanOrder *const scan_order =
209 &av1_scan_orders[tx_size][DCT_DCT]; //&av1_default_scan_orders[tx_size]
210 int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
211 const int shift = tx_size == TX_32X32 ? 0 : 2;
212
213 svt_av1_quantize_fp(coeff,
214 pix_num,
215 p->zbin_qtx,
216 p->round_fp_qtx,
217 p->quant_fp_qtx,
218 p->quant_shift_qtx,
219 qcoeff,
220 dqcoeff,
221 p->dequant_qtx,
222 eob,
223 scan_order->scan,
224 scan_order->iscan);
225
226 *recon_error = svt_av1_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
227 *recon_error = AOMMAX(*recon_error, 1);
228
229 *sse = (*sse) >> shift;
230 *sse = AOMMAX(*sse, 1);
231 }
232
rate_estimator(TranLow * qcoeff,int eob,TxSize tx_size)233 static int rate_estimator(TranLow *qcoeff, int eob, TxSize tx_size) {
234 const ScanOrder *const scan_order =
235 &av1_scan_orders[tx_size][DCT_DCT]; //&av1_default_scan_orders[tx_size]
236
237 assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
238
239 int rate_cost = 1;
240
241 for (int idx = 0; idx < eob; ++idx) {
242 int abs_level = abs(qcoeff[scan_order->scan[idx]]);
243 rate_cost += (int)(log1p(abs_level) / log(2.0)) + 1;
244 }
245
246 return (rate_cost << AV1_PROB_COST_SHIFT);
247 }
248
249
250
251
result_model_store(PictureParentControlSet * pcs_ptr,TplStats * tpl_stats_ptr,uint32_t mb_origin_x,uint32_t mb_origin_y)252 static void result_model_store(PictureParentControlSet *pcs_ptr, TplStats *tpl_stats_ptr,
253 uint32_t mb_origin_x, uint32_t mb_origin_y) {
254 const int mi_height = mi_size_high[BLOCK_16X16];
255 const int mi_width = mi_size_wide[BLOCK_16X16];
256 const int step = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
257 const int shift = 3 + pcs_ptr->is_720p_or_larger;
258 const int aligned16_width = ((pcs_ptr->aligned_width + 15) / 16) << 4;
259
260 int64_t srcrf_dist = tpl_stats_ptr->srcrf_dist / (mi_height * mi_width);
261 int64_t recrf_dist = tpl_stats_ptr->recrf_dist / (mi_height * mi_width);
262 int64_t srcrf_rate = tpl_stats_ptr->srcrf_rate / (mi_height * mi_width);
263 int64_t recrf_rate = tpl_stats_ptr->recrf_rate / (mi_height * mi_width);
264
265 srcrf_dist = AOMMAX(1, srcrf_dist);
266 recrf_dist = AOMMAX(1, recrf_dist);
267 srcrf_rate = AOMMAX(1, srcrf_rate);
268 recrf_rate = AOMMAX(1, recrf_rate);
269
270 for (int idy = 0; idy < mi_height; idy += step) {
271 TplStats *dst_ptr =
272 pcs_ptr->tpl_stats[((mb_origin_y >> shift) + (idy >> 1)) * (aligned16_width >> shift) +
273 (mb_origin_x >> shift)];
274 for (int idx = 0; idx < mi_width; idx += step) {
275 dst_ptr->srcrf_dist = srcrf_dist;
276 dst_ptr->recrf_dist = recrf_dist;
277 dst_ptr->srcrf_rate = srcrf_rate;
278 dst_ptr->recrf_rate = recrf_rate;
279 dst_ptr->mv = tpl_stats_ptr->mv;
280 dst_ptr->ref_frame_poc = tpl_stats_ptr->ref_frame_poc;
281 ++dst_ptr;
282 }
283 }
284 }
285
286
287 static const int16_t dc_qlookup_QTX[QINDEX_RANGE] = {
288 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 19,
289 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 32, 33,
290 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47,
291 48, 48, 49, 50, 51, 52, 53, 53, 54, 55, 56, 57, 57, 58, 59, 60,
292 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 70, 71, 72, 73,
293 74, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85,
294 87, 88, 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110,
295 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, 136, 138,
296 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, 166, 169, 172, 174,
297 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, 205, 208, 211, 214, 217, 220,
298 223, 226, 230, 233, 237, 240, 243, 247, 250, 253, 257, 261, 265, 269, 272, 276,
299 280, 284, 288, 292, 296, 300, 304, 309, 313, 317, 322, 326, 330, 335, 340, 344,
300 349, 354, 359, 364, 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429,
301 435, 441, 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549,
302 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, 755, 775,
303 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336,
304 };
305
306 static const int16_t dc_qlookup_10_QTX[QINDEX_RANGE] = {
307 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, 40, 43, 47,
308 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, 86, 90, 93, 97, 101, 105,
309 109, 113, 116, 120, 124, 128, 132, 136, 140, 143, 147, 151, 155, 159, 163, 166,
310 170, 174, 178, 182, 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226,
311 230, 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, 280, 283,
312 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, 324, 327, 331, 334, 337,
313 343, 350, 356, 362, 369, 375, 381, 387, 394, 400, 406, 412, 418, 424, 430, 436,
314 442, 448, 454, 460, 466, 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550,
315 559, 567, 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, 698,
316 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, 844, 856, 868, 880,
317 891, 906, 920, 933, 947, 961, 975, 988, 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105,
318 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379,
319 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717,
320 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197,
321 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102,
322 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347,
323 };
324
325 static const int16_t dc_qlookup_12_QTX[QINDEX_RANGE] = {
326 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, 103, 115,
327 127, 140, 153, 166, 180, 194, 208, 222, 237, 251, 266, 281, 296,
328 312, 327, 343, 358, 374, 390, 405, 421, 437, 453, 469, 484, 500,
329 516, 532, 548, 564, 580, 596, 611, 627, 643, 659, 674, 690, 706,
330 721, 737, 752, 768, 783, 798, 814, 829, 844, 859, 874, 889, 904,
331 919, 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, 1094,
332 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, 1248, 1261, 1275,
333 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544,
334 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, 1765, 1789, 1814, 1838, 1862,
335 1885, 1909, 1933, 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267,
336 2300, 2334, 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746,
337 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, 3275, 3324,
338 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, 4005,
339 4058, 4119, 4181, 4241, 4301, 4361, 4420, 4479, 4546, 4612, 4677, 4742, 4807,
340 4871, 4942, 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745,
341 5825, 5905, 5984, 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867,
342 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, 8352,
343 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, 10465, 10702,
344 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, 13501, 13913, 14343, 14807, 15290,
345 15812, 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387,
346 };
347
av1_dc_quant_qtx(int qindex,int delta,AomBitDepth bit_depth)348 int16_t av1_dc_quant_qtx(int qindex, int delta, AomBitDepth bit_depth) {
349 const int q_clamped = clamp(qindex + delta, 0, MAXQ);
350 switch (bit_depth) {
351 case AOM_BITS_8: return dc_qlookup_QTX[q_clamped];
352 case AOM_BITS_10: return dc_qlookup_10_QTX[q_clamped];
353 case AOM_BITS_12: return dc_qlookup_12_QTX[q_clamped];
354 default: assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); return -1;
355 }
356 }
357
svt_av1_compute_rd_mult_based_on_qindex(AomBitDepth bit_depth,int qindex)358 int svt_av1_compute_rd_mult_based_on_qindex(AomBitDepth bit_depth, int qindex) {
359 const int q = av1_dc_quant_qtx(qindex, 0, bit_depth);
360 //const int q = svt_av1_dc_quant_Q3(qindex, 0, bit_depth);
361 int rdmult = q * q;
362 rdmult = rdmult * 3 + (rdmult * 2 / 3);
363 switch (bit_depth) {
364 case AOM_BITS_8: break;
365 case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
366 case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
367 default: assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); return -1;
368 }
369 return rdmult > 0 ? rdmult : 1;
370 }
371
372 void svt_av1_build_quantizer(AomBitDepth bit_depth, int32_t y_dc_delta_q, int32_t u_dc_delta_q,
373 int32_t u_ac_delta_q, int32_t v_dc_delta_q, int32_t v_ac_delta_q,
374 Quants *const quants, Dequants *const deq);
375
376 double svt_av1_convert_qindex_to_q(int32_t qindex, AomBitDepth bit_depth);
377
378 int32_t svt_av1_compute_qdelta(double qstart, double qtarget, AomBitDepth bit_depth);
379
380 extern void filter_intra_edge(OisMbResults *ois_mb_results_ptr, uint8_t mode,
381 uint16_t max_frame_width, uint16_t max_frame_height, int32_t p_angle,
382 int32_t cu_origin_x, int32_t cu_origin_y, uint8_t *above_row,
383 uint8_t *left_col);
384
385 //Given one reference frame identified by the pair (list_index,ref_index)
386 //indicate if ME data is valid
is_me_data_valid(const MeSbResults * me_results,uint32_t me_mb_offset,uint8_t list_idx,uint8_t ref_idx)387 static uint8_t is_me_data_valid(const MeSbResults *me_results, uint32_t me_mb_offset,
388 uint8_t list_idx, uint8_t ref_idx) {
389 uint8_t total_me_cnt = me_results->total_me_candidate_index[me_mb_offset];
390 const MeCandidate *me_block_results =
391 &me_results->me_candidate_array[me_mb_offset * MAX_PA_ME_CAND];
392
393 for (uint32_t me_cand_i = 0; me_cand_i < total_me_cnt; ++me_cand_i) {
394 const MeCandidate *me_cand = &me_block_results[me_cand_i];
395 assert(/*me_cand->direction >= 0 && */ me_cand->direction <= 2);
396 if (me_cand->direction == 0 || me_cand->direction == 2) {
397 if (list_idx == me_cand->ref0_list && ref_idx == me_cand->ref_idx_l0)
398 return 1;
399 }
400 if (me_cand->direction == 1 || me_cand->direction == 2) {
401 if (list_idx == me_cand->ref1_list && ref_idx == me_cand->ref_idx_l1)
402 return 1;
403 }
404 }
405 return 0;
406 }
407
408
clip_mv_in_pad(EbPictureBufferDesc * ref_pic_ptr,uint32_t mb_origin_x,uint32_t mb_origin_y,int16_t * x_curr_mv,int16_t * y_curr_mv)409 void clip_mv_in_pad(
410 EbPictureBufferDesc *ref_pic_ptr,
411 uint32_t mb_origin_x,
412 uint32_t mb_origin_y,
413 int16_t *x_curr_mv,
414 int16_t *y_curr_mv)
415 {
416 // Search area adjustment
417 int16_t blk_origin_x = mb_origin_x;
418 int16_t blk_origin_y = mb_origin_y;
419 int16_t bwidth = 16;
420 int16_t bheight = 16;
421 int16_t mvx = *x_curr_mv;
422 int16_t mvy = *y_curr_mv;
423 int16_t padx = TPL_PADX;
424 int16_t pady = TPL_PADY;
425
426 if ((blk_origin_x + (mvx >> 3)) < -padx)
427 mvx = (-padx - blk_origin_x) << 3;
428
429 if ((blk_origin_x + bwidth + (mvx >> 3)) > (padx + ref_pic_ptr->max_width - 1))
430 mvx = ((padx + ref_pic_ptr->max_width - 1) - (blk_origin_x + bwidth)) << 3;
431
432 if ((blk_origin_y + (mvy >> 3)) < -pady)
433 mvy = (-pady - blk_origin_y) << 3;
434
435 if ((blk_origin_y + bheight + (mvy >> 3)) > (pady + ref_pic_ptr->max_height - 1))
436 mvy = ((pady + ref_pic_ptr->max_height - 1) - (blk_origin_y + bheight)) << 3;
437
438 *x_curr_mv = mvx;
439 *y_curr_mv = mvy;
440 }
441 // Reference pruning, Loop over all available references and get the best reference idx based on SAD
get_best_reference(PictureParentControlSet * pcs_ptr,uint32_t sb_index,uint32_t me_mb_offset,uint32_t mb_origin_x,uint32_t mb_origin_y,uint32_t * best_reference)442 void get_best_reference(
443 PictureParentControlSet *pcs_ptr,
444 uint32_t sb_index,
445 uint32_t me_mb_offset,
446 uint32_t mb_origin_x,
447 uint32_t mb_origin_y,
448 uint32_t *best_reference )
449 {
450 EbPictureBufferDesc *input_ptr = pcs_ptr->enhanced_picture_ptr;
451 uint32_t max_inter_ref = MAX_PA_ME_MV;
452 EbPictureBufferDesc *ref_pic_ptr;
453 int16_t x_curr_mv = 0;
454 int16_t y_curr_mv = 0;
455 uint32_t best_reference_sad = UINT32_MAX;
456 uint32_t reference_sad;
457 uint8_t * src_mb = input_ptr->buffer_y + input_ptr->origin_x + mb_origin_x +
458 (input_ptr->origin_y + mb_origin_y) * input_ptr->stride_y;
459
460 for (uint32_t rf_idx = 0; rf_idx < max_inter_ref; rf_idx++) {
461 uint32_t list_index = rf_idx < 4 ? 0 : 1;
462 uint32_t ref_pic_index = rf_idx >= 4 ? (rf_idx - 4) : rf_idx;
463 if ((list_index == 0 && (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref0_count) ||
464 (list_index == 1 && (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref1_count))
465 continue;
466 if (!is_me_data_valid(
467 pcs_ptr->pa_me_data->me_results[sb_index], me_mb_offset, list_index, ref_pic_index))
468 continue;
469 ref_pic_ptr = (EbPictureBufferDesc *)pcs_ptr->tpl_data
470 .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
471 .picture_ptr;
472
473 const MeSbResults *me_results = pcs_ptr->pa_me_data->me_results[sb_index];
474 x_curr_mv =
475 me_results
476 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) + ref_pic_index]
477 .x_mv
478 << 1;
479 y_curr_mv =
480 me_results
481 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) + ref_pic_index]
482 .y_mv
483 << 1;
484 clip_mv_in_pad(ref_pic_ptr,mb_origin_x,mb_origin_y,&x_curr_mv,&y_curr_mv);
485 MV best_mv = {y_curr_mv, x_curr_mv};
486 int32_t ref_origin_index = ref_pic_ptr->origin_x + (mb_origin_x + (best_mv.col >> 3)) +
487 (mb_origin_y + (best_mv.row >> 3) + ref_pic_ptr->origin_y) * ref_pic_ptr->stride_y;
488 reference_sad = svt_nxm_sad_kernel_sub_sampled(src_mb,
489 input_ptr->stride_y,
490 ref_pic_ptr->buffer_y + ref_origin_index,
491 ref_pic_ptr->stride_y,
492 16,
493 16);
494 if (reference_sad < best_reference_sad) {
495 best_reference_sad = reference_sad;
496 *best_reference = rf_idx;
497 }
498 }
499 return;
500 }
501
502
503
504
505
506 /*
507 TPL Dispenser SB based (sz 64x64)
508 */
tpl_mc_flow_dispenser_sb(EncodeContext * encode_context_ptr,SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr,int32_t frame_idx,uint32_t sb_index,int32_t qIndex)509 void tpl_mc_flow_dispenser_sb(
510 EncodeContext *encode_context_ptr,
511 SequenceControlSet *scs_ptr,
512 PictureParentControlSet *pcs_ptr,
513 int32_t frame_idx,
514 uint32_t sb_index,
515 int32_t qIndex)
516 {
517 {
518 uint32_t picture_width_in_mb = (pcs_ptr->enhanced_picture_ptr->width + 16 - 1) / 16;
519 int16_t x_curr_mv = 0;
520 int16_t y_curr_mv = 0;
521 uint32_t me_mb_offset = 0;
522 TxSize tx_size = TX_16X16;
523 EbPictureBufferDesc *ref_pic_ptr;
524 BlockGeom blk_geom;
525 EbPictureBufferDesc *input_picture_ptr = pcs_ptr->enhanced_picture_ptr;
526 EbPictureBufferDesc *recon_picture_ptr =
527 encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx];
528 TplStats tpl_stats;
529
530 DECLARE_ALIGNED(32, uint8_t, predictor8[256 * 2]);
531 DECLARE_ALIGNED(32, int16_t, src_diff[256]);
532 DECLARE_ALIGNED(32, TranLow, coeff[256]);
533 DECLARE_ALIGNED(32, TranLow, qcoeff[256]);
534 DECLARE_ALIGNED(32, TranLow, dqcoeff[256]);
535 DECLARE_ALIGNED(32, TranLow, best_coeff[256]);
536 uint8_t *predictor = predictor8;
537
538 blk_geom.bwidth = 16;
539 blk_geom.bheight = 16;
540
541 MacroblockPlane mb_plane;
542 mb_plane.quant_qtx = scs_ptr->quants_8bit.y_quant[qIndex];
543 mb_plane.quant_fp_qtx = scs_ptr->quants_8bit.y_quant_fp[qIndex];
544 mb_plane.round_fp_qtx = scs_ptr->quants_8bit.y_round_fp[qIndex];
545 mb_plane.quant_shift_qtx = scs_ptr->quants_8bit.y_quant_shift[qIndex];
546 mb_plane.zbin_qtx = scs_ptr->quants_8bit.y_zbin[qIndex];
547 mb_plane.round_qtx = scs_ptr->quants_8bit.y_round[qIndex];
548 mb_plane.dequant_qtx = scs_ptr->deq_8bit.y_dequant_qtx[qIndex];
549
550 EbPictureBufferDesc *input_ptr = pcs_ptr->enhanced_picture_ptr;
551 const uint8_t tpl_opt_flag = pcs_ptr->tpl_ctrls.tpl_opt_flag;
552
553
554 SbParams *sb_params = &scs_ptr->sb_params_array[sb_index];
555 uint32_t pa_blk_index = 0;
556 while (pa_blk_index < CU_MAX_COUNT) {
557 const CodedBlockStats *blk_stats_ptr;
558 blk_stats_ptr = get_coded_blk_stats(pa_blk_index);
559 uint8_t bsize = blk_stats_ptr->size;
560 EbBool small_boundary_blk = EB_FALSE;
561
562 {
563 uint32_t cu_origin_x = sb_params->origin_x + blk_stats_ptr->origin_x;
564 uint32_t cu_origin_y = sb_params->origin_y + blk_stats_ptr->origin_y;
565 if ((blk_stats_ptr->origin_x % 16) == 0 &&
566 (blk_stats_ptr->origin_y % 16) == 0 &&
567 ((pcs_ptr->enhanced_picture_ptr->width - cu_origin_x) < 16 ||
568 (pcs_ptr->enhanced_picture_ptr->height - cu_origin_y) < 16))
569 small_boundary_blk = EB_TRUE;
570 }
571 if (bsize != 16 && !small_boundary_blk) {
572 pa_blk_index++;
573 continue;
574 }
575 if (sb_params->raster_scan_blk_validity[md_scan_to_raster_scan[pa_blk_index]]) {
576 uint32_t mb_origin_x = sb_params->origin_x + blk_stats_ptr->origin_x;
577 uint32_t mb_origin_y = sb_params->origin_y + blk_stats_ptr->origin_y;
578 const int dst_buffer_stride = recon_picture_ptr->stride_y;
579 const int dst_mb_offset = mb_origin_y * dst_buffer_stride + mb_origin_x;
580 const int dst_basic_offset = recon_picture_ptr->origin_y *
581 recon_picture_ptr->stride_y +
582 recon_picture_ptr->origin_x;
583 uint8_t *dst_buffer = recon_picture_ptr->buffer_y + dst_basic_offset +
584 dst_mb_offset;
585
586 int64_t inter_cost;
587 int64_t recon_error = 1, sse = 1;
588 uint64_t best_ref_poc = 0;
589 int32_t best_rf_idx = -1;
590 int64_t best_inter_cost = INT64_MAX;
591 MV final_best_mv = {0, 0};
592 uint32_t max_inter_ref = MAX_PA_ME_MV;
593
594 PredictionMode best_intra_mode = DC_PRED;
595 int64_t best_intra_cost = INT64_MAX;
596 // Disable intra prediction
597 uint8_t disable_intra_pred = tpl_opt_flag && (pcs_ptr->tpl_ctrls.disable_intra_pred_nref ||
598 pcs_ptr->tpl_ctrls.disable_intra_pred_nbase);
599 if (!disable_intra_pred ||
600 (pcs_ptr->tpl_ctrls.disable_intra_pred_nref && pcs_ptr->tpl_data.is_used_as_reference_flag) ||
601 (pcs_ptr->tpl_ctrls.disable_intra_pred_nbase && pcs_ptr->tpl_data.tpl_temporal_layer_index == 0)){
602 if (scs_ptr->in_loop_ois == 0) {
603 OisMbResults *ois_mb_results_ptr =
604 pcs_ptr->ois_mb_results[(mb_origin_y >> 4) * picture_width_in_mb +
605 (mb_origin_x >> 4)];
606 best_intra_mode = ois_mb_results_ptr->intra_mode;
607 best_intra_cost = ois_mb_results_ptr->intra_cost;
608
609 } else { // ois
610 // always process as block16x16 even bsize or tx_size is 8x8
611 bsize = 16;
612 DECLARE_ALIGNED(16, uint8_t, left0_data[MAX_TX_SIZE * 2 + 32]);
613 DECLARE_ALIGNED(16, uint8_t, above0_data[MAX_TX_SIZE * 2 + 32]);
614 DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
615 DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
616
617 uint8_t *above_row;
618 uint8_t *left_col;
619 uint8_t *above0_row;
620 uint8_t *left0_col;
621 above0_row = above0_data + 16;
622 left0_col = left0_data + 16;
623 above_row = above_data + 16;
624 left_col = left_data + 16;
625
626 uint8_t *src = input_ptr->buffer_y +
627 pcs_ptr->enhanced_picture_ptr->origin_x + mb_origin_x +
628 (pcs_ptr->enhanced_picture_ptr->origin_y + mb_origin_y) *
629 input_ptr->stride_y;
630
631 // Fill Neighbor Arrays
632 update_neighbor_samples_array_open_loop_mb(
633 1, // use_top_righ_bottom_left
634 1, // update_top_neighbor
635 above0_row - 1,
636 left0_col - 1,
637 input_ptr,
638 input_ptr->stride_y,
639 mb_origin_x,
640 mb_origin_y,
641 bsize,
642 bsize);
643
644 uint8_t ois_intra_mode;
645 uint8_t intra_mode_start = DC_PRED;
646 EbBool enable_paeth = pcs_ptr->scs_ptr->static_config.enable_paeth ==
647 DEFAULT
648 ? EB_TRUE
649 : (EbBool)pcs_ptr->scs_ptr->static_config.enable_paeth;
650 EbBool enable_smooth = pcs_ptr->scs_ptr->static_config.enable_smooth ==
651 DEFAULT
652 ? EB_TRUE
653 : (EbBool)pcs_ptr->scs_ptr->static_config.enable_smooth;
654 uint8_t intra_mode_end =
655 pcs_ptr->tpl_ctrls.tpl_opt_flag
656
657 ? DC_PRED
658 : enable_paeth ? PAETH_PRED
659 : enable_smooth ? SMOOTH_H_PRED
660 : D67_PRED;
661
662 for (ois_intra_mode = intra_mode_start;
663 ois_intra_mode <= intra_mode_end;
664 ++ois_intra_mode) {
665 int32_t p_angle = av1_is_directional_mode(
666 (PredictionMode)ois_intra_mode)
667 ? mode_to_angle_map[(PredictionMode)ois_intra_mode]
668 : 0;
669 // Edge filter
670 if (av1_is_directional_mode((PredictionMode)ois_intra_mode) &&
671 1 /*scs_ptr->seq_header.enable_intra_edge_filter*/) {
672 EB_MEMCPY(left_data,
673 left0_data,
674 sizeof(uint8_t) * (MAX_TX_SIZE * 2 + 32));
675 EB_MEMCPY(above_data,
676 above0_data,
677 sizeof(uint8_t) * (MAX_TX_SIZE * 2 + 32));
678 above_row = above_data + 16;
679 left_col = left_data + 16;
680 filter_intra_edge(NULL,
681 ois_intra_mode,
682 scs_ptr->seq_header.max_frame_width,
683 scs_ptr->seq_header.max_frame_height,
684 p_angle,
685 (int32_t)mb_origin_x,
686 (int32_t)mb_origin_y,
687 above_row,
688 left_col);
689 } else {
690 above_row = above0_row;
691 left_col = left0_col;
692 }
693 // PRED
694 intra_prediction_open_loop_mb(p_angle,
695 ois_intra_mode,
696 mb_origin_x,
697 mb_origin_y,
698 tx_size,
699 above_row,
700 left_col,
701 predictor,
702 16);
703
704 // Distortion
705 int64_t intra_cost;
706 if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_intra_search) {
707 intra_cost = svt_nxm_sad_kernel_sub_sampled(
708 src,
709 input_ptr->stride_y,
710 predictor,
711 16,
712 16,
713 16);
714 }
715 else {
716 svt_aom_subtract_block(
717 16, 16, src_diff, 16, src, input_ptr->stride_y, predictor, 16);
718 EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
719 svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size, pf_shape, 8, 0);
720 intra_cost = svt_aom_satd(coeff, 16 * 16);
721 }
722
723 if (intra_cost < best_intra_cost) {
724 best_intra_cost = intra_cost;
725 best_intra_mode = ois_intra_mode;
726 }
727 }
728 }
729 }
730 uint8_t best_mode = DC_PRED;
731 uint8_t *src_mb = input_picture_ptr->buffer_y + input_picture_ptr->origin_x +
732 mb_origin_x +
733 (input_picture_ptr->origin_y + mb_origin_y) * input_picture_ptr->stride_y;
734 memset(&tpl_stats, 0, sizeof(tpl_stats));
735 blk_geom.origin_x = blk_stats_ptr->origin_x;
736 blk_geom.origin_y = blk_stats_ptr->origin_y;
737 me_mb_offset = get_me_info_index(
738 pcs_ptr->max_number_of_pus_per_sb, &blk_geom, 0, 0);
739
740 uint32_t best_reference = 0;
741 if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.get_best_ref)
742 // Reference pruning
743 get_best_reference(pcs_ptr,
744 sb_index,
745 me_mb_offset,
746 mb_origin_x,
747 mb_origin_y,
748 &best_reference);
749
750 for (uint32_t rf_idx = 0; rf_idx < max_inter_ref; rf_idx++) {
751 if (pcs_ptr->tpl_ctrls.get_best_ref)
752 if (rf_idx != best_reference)
753 continue;
754 uint32_t list_index = rf_idx < 4 ? 0 : 1;
755 uint32_t ref_pic_index = rf_idx >= 4 ? (rf_idx - 4) : rf_idx;
756 if ((list_index == 0 &&
757 (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref0_count) ||
758 (list_index == 1 &&
759 (ref_pic_index + 1) > pcs_ptr->tpl_data.tpl_ref1_count))
760 continue;
761 if (!is_me_data_valid(pcs_ptr->pa_me_data->me_results[sb_index],
762 me_mb_offset,
763 list_index,
764 ref_pic_index))
765 continue;
766 ref_pic_ptr = (EbPictureBufferDesc *)pcs_ptr->tpl_data
767 .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
768 .picture_ptr;
769 const MeSbResults *me_results = pcs_ptr->pa_me_data->me_results[sb_index];
770 x_curr_mv = me_results
771 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV +
772 (list_index ? 4 : 0) + ref_pic_index]
773 .x_mv
774 << 1;
775 y_curr_mv = me_results
776 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV +
777 (list_index ? 4 : 0) + ref_pic_index]
778 .y_mv
779 << 1;
780 clip_mv_in_pad(ref_pic_ptr,mb_origin_x,mb_origin_y,&x_curr_mv,&y_curr_mv);
781 MV best_mv = {y_curr_mv, x_curr_mv};
782 if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_inter_search) {
783 int32_t ref_origin_index = ref_pic_ptr->origin_x +
784 (mb_origin_x + (best_mv.col >> 3)) +
785 (mb_origin_y + (best_mv.row >> 3) +
786 ref_pic_ptr->origin_y) * ref_pic_ptr->stride_y;
787 //sad_1
788 inter_cost = svt_nxm_sad_kernel_sub_sampled(
789 src_mb,
790 input_ptr->stride_y,
791 ref_pic_ptr->buffer_y + ref_origin_index,
792 ref_pic_ptr->stride_y,
793 16,
794 16);
795 }
796 else {
797 int32_t ref_origin_index = ref_pic_ptr->origin_x +
798 (mb_origin_x + (best_mv.col >> 3)) +
799 (mb_origin_y + (best_mv.row >> 3) + ref_pic_ptr->origin_y) *
800 ref_pic_ptr->stride_y;
801
802 svt_aom_subtract_block(16,
803 16,
804 src_diff,
805 16,
806 src_mb,
807 input_picture_ptr->stride_y,
808 ref_pic_ptr->buffer_y + ref_origin_index,
809 ref_pic_ptr->stride_y);
810 EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
811 svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size, pf_shape, 8, 0);
812
813 inter_cost = svt_aom_satd(coeff, 256);
814 }
815 if (inter_cost < best_inter_cost) {
816 if (!(pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_inter_search))
817 EB_MEMCPY(best_coeff, coeff, sizeof(best_coeff));
818 best_ref_poc = pcs_ptr->tpl_data
819 .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
820 .picture_number;
821
822 best_rf_idx = rf_idx;
823 best_inter_cost = inter_cost;
824 final_best_mv = best_mv;
825
826 if (best_inter_cost < best_intra_cost)
827 best_mode = NEWMV;
828 }
829 } // rf_idx
830
831 if (best_mode == NEWMV) {
832 uint16_t eob = 0;
833 if (pcs_ptr->tpl_ctrls.tpl_opt_flag && pcs_ptr->tpl_ctrls.use_pred_sad_in_inter_search) {
834 uint32_t list_index = best_rf_idx < 4 ? 0 : 1;
835 uint32_t ref_pic_index = best_rf_idx >= 4 ? (best_rf_idx - 4) : best_rf_idx;
836
837 ref_pic_ptr = (EbPictureBufferDesc*)pcs_ptr->tpl_data.tpl_ref_ds_ptr_array[list_index][ref_pic_index].picture_ptr;
838
839 int32_t ref_origin_index = ref_pic_ptr->origin_x +
840 (mb_origin_x + (final_best_mv.col >> 3)) +
841 (mb_origin_y + (final_best_mv.row >> 3) +
842 ref_pic_ptr->origin_y) * ref_pic_ptr->stride_y;
843 svt_aom_subtract_block(16, 16, src_diff, 16, src_mb, input_picture_ptr->stride_y,
844 ref_pic_ptr->buffer_y + ref_origin_index, ref_pic_ptr->stride_y);
845 EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
846 svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size, pf_shape, 8, 0);
847 memcpy(best_coeff, coeff, sizeof(best_coeff));
848 }
849 get_quantize_error(&mb_plane,
850 best_coeff,
851 qcoeff,
852 dqcoeff,
853 tx_size,
854 &eob,
855 &recon_error,
856 &sse);
857 int rate_cost = pcs_ptr->tpl_ctrls.tpl_opt_flag ? 0 : rate_estimator(qcoeff, eob, tx_size);
858
859 tpl_stats.srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
860 tpl_stats.srcrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
861 }
862 if (best_mode == NEWMV) {
863 // inter recon with rec_picture as reference pic
864 uint64_t ref_poc = best_ref_poc;
865 uint32_t list_index = best_rf_idx < 4 ? 0 : 1;
866 uint32_t ref_pic_index = best_rf_idx >= 4 ? (best_rf_idx - 4) : best_rf_idx;
867 if (pcs_ptr->tpl_data.ref_in_slide_window[list_index][ref_pic_index]) {
868 uint32_t ref_frame_idx = 0;
869 while (ref_frame_idx < MAX_TPL_LA_SW &&
870 encode_context_ptr->poc_map_idx[ref_frame_idx] != ref_poc)
871 ref_frame_idx++;
872 assert(ref_frame_idx != MAX_TPL_LA_SW);
873 ref_pic_ptr =
874 encode_context_ptr->mc_flow_rec_picture_buffer[ref_frame_idx];
875 } else
876 ref_pic_ptr = (EbPictureBufferDesc *)pcs_ptr->tpl_data
877 .tpl_ref_ds_ptr_array[list_index][ref_pic_index]
878 .picture_ptr;
879 int32_t ref_origin_index = ref_pic_ptr->origin_x +
880 (mb_origin_x + (final_best_mv.col >> 3)) +
881 (mb_origin_y + (final_best_mv.row >> 3) + ref_pic_ptr->origin_y) *
882 ref_pic_ptr->stride_y;
883 for (int i = 0; i < 16; ++i)
884 EB_MEMCPY(dst_buffer + i * dst_buffer_stride,
885 ref_pic_ptr->buffer_y + ref_origin_index +
886 i * ref_pic_ptr->stride_y,
887 sizeof(uint8_t) * (16));
888 } else {
889 // intra recon
890
891 uint8_t *above_row;
892 uint8_t *left_col;
893 DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
894 DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
895
896 above_row = above_data + 16;
897 left_col = left_data + 16;
898 uint8_t *recon_buffer = recon_picture_ptr->buffer_y + dst_basic_offset;
899
900 update_neighbor_samples_array_open_loop_mb_recon(
901 1, // use_top_righ_bottom_left
902 1, // update_top_neighbor
903 above_row - 1,
904 left_col - 1,
905 recon_buffer,
906 dst_buffer_stride,
907 mb_origin_x,
908 mb_origin_y,
909 16,
910 16,
911 input_picture_ptr->width,
912 input_picture_ptr->height);
913
914 uint8_t ois_intra_mode = best_intra_mode; // ois_mb_results_ptr->intra_mode;
915 int32_t p_angle = av1_is_directional_mode((PredictionMode)ois_intra_mode)
916 ? mode_to_angle_map[(PredictionMode)ois_intra_mode]
917 : 0;
918 // Edge filter
919 if (av1_is_directional_mode((PredictionMode)ois_intra_mode) &&
920 1 /*scs_ptr->seq_header.enable_intra_edge_filter*/) {
921 filter_intra_edge(NULL,
922 ois_intra_mode,
923 scs_ptr->seq_header.max_frame_width,
924 scs_ptr->seq_header.max_frame_height,
925 p_angle,
926 mb_origin_x,
927 mb_origin_y,
928 above_row,
929 left_col);
930 }
931 // PRED
932 intra_prediction_open_loop_mb(p_angle,
933 ois_intra_mode,
934 mb_origin_x,
935 mb_origin_y,
936 tx_size,
937 above_row,
938 left_col,
939 dst_buffer,
940 dst_buffer_stride);
941 }
942
943 svt_aom_subtract_block(16,
944 16,
945 src_diff,
946 16,
947 src_mb,
948 input_picture_ptr->stride_y,
949 dst_buffer,
950 dst_buffer_stride);
951 EB_TRANS_COEFF_SHAPE pf_shape = pcs_ptr->tpl_ctrls.tpl_opt_flag ? pcs_ptr->tpl_ctrls.pf_shape : DEFAULT_SHAPE;
952 svt_av1_wht_fwd_txfm(src_diff, 16, coeff, tx_size,pf_shape, 8, 0);
953
954 uint16_t eob = 0;
955
956 get_quantize_error(
957 &mb_plane, coeff, qcoeff, dqcoeff, tx_size, &eob, &recon_error, &sse);
958 int rate_cost = pcs_ptr->tpl_ctrls.tpl_opt_flag ? 0 : rate_estimator(qcoeff, eob, tx_size);
959 // Disable intra prediction
960 disable_intra_pred = tpl_opt_flag && (pcs_ptr->tpl_ctrls.disable_intra_pred_nref ||
961 pcs_ptr->tpl_ctrls.disable_intra_pred_nbase);
962 if (!disable_intra_pred || (pcs_ptr->tpl_data.is_used_as_reference_flag))
963 if (eob) {
964 av1_inv_transform_recon8bit((int32_t *)dqcoeff,
965 dst_buffer,
966 dst_buffer_stride,
967 dst_buffer,
968 dst_buffer_stride,
969 TX_16X16,
970 DCT_DCT,
971 PLANE_TYPE_Y,
972 eob,
973 0);
974 }
975
976 tpl_stats.recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
977 tpl_stats.recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
978 if (best_mode != NEWMV) {
979 tpl_stats.srcrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
980 tpl_stats.srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
981 }
982 tpl_stats.recrf_dist = AOMMAX(tpl_stats.srcrf_dist, tpl_stats.recrf_dist);
983 tpl_stats.recrf_rate = AOMMAX(tpl_stats.srcrf_rate, tpl_stats.recrf_rate);
984 if (pcs_ptr->tpl_data.tpl_slice_type != I_SLICE && best_rf_idx != -1) {
985 tpl_stats.mv = final_best_mv;
986 tpl_stats.ref_frame_poc = best_ref_poc;
987 }
988 // Motion flow dependency dispenser.
989 result_model_store(pcs_ptr, &tpl_stats, mb_origin_x, mb_origin_y);
990 }
991 pa_blk_index++;
992 }
993
994 }
995
996 }
997
998 #define TPL_TASKS_MDC_INPUT 0
999 #define TPL_TASKS_ENCDEC_INPUT 1
1000 #define TPL_TASKS_CONTINUE 2
1001 /*
1002 Assign TPL dispenser segments
1003 */
assign_tpl_segments(EncDecSegments * segmentPtr,uint16_t * segmentInOutIndex,TplDispResults * taskPtr,int32_t frame_idx,EbFifo * srmFifoPtr)1004 EbBool assign_tpl_segments(EncDecSegments *segmentPtr, uint16_t *segmentInOutIndex,
1005 TplDispResults * taskPtr,
1006 int32_t frame_idx, EbFifo *srmFifoPtr) {
1007 EbBool continue_processing_flag = EB_FALSE;
1008 uint32_t row_segment_index = 0;
1009 uint32_t segment_index;
1010 uint32_t right_segment_index;
1011 uint32_t bottom_left_segment_index;
1012
1013 int16_t feedback_row_index = -1;
1014
1015 uint32_t self_assigned = EB_FALSE;
1016
1017 //static FILE *trace = 0;
1018 //
1019 //if(trace == 0) {
1020 // trace = fopen("seg-trace.txt","w");
1021 //}
1022
1023 switch (taskPtr->input_type) {
1024 case TPL_TASKS_MDC_INPUT:
1025
1026 // The entire picture is provided by the MDC process, so
1027 // no logic is necessary to clear input dependencies.
1028 for (uint32_t row_index = 0; row_index < segmentPtr->segment_row_count; ++row_index) {
1029 segmentPtr->row_array[row_index].current_seg_index =
1030 segmentPtr->row_array[row_index].starting_seg_index;
1031 }
1032
1033
1034 // Start on Segment 0 immediately
1035 *segmentInOutIndex = segmentPtr->row_array[0].current_seg_index;
1036 taskPtr->input_type = TPL_TASKS_CONTINUE;
1037 ++segmentPtr->row_array[0].current_seg_index;
1038 continue_processing_flag = EB_TRUE;
1039
1040 //fprintf(trace, "Start Pic: %u Seg: %u\n",
1041 // (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1042 // *segmentInOutIndex);
1043
1044 break;
1045
1046 case TPL_TASKS_ENCDEC_INPUT:
1047
1048 // Setup row_segment_index to release the in_progress token
1049 //row_segment_index = taskPtr->encDecSegmentRowArray[0];
1050
1051 // Start on the assigned row immediately
1052 *segmentInOutIndex = segmentPtr->row_array[taskPtr->enc_dec_segment_row].current_seg_index;
1053 taskPtr->input_type = TPL_TASKS_CONTINUE;
1054 ++segmentPtr->row_array[taskPtr->enc_dec_segment_row].current_seg_index;
1055 continue_processing_flag = EB_TRUE;
1056
1057 //fprintf(trace, "Start Pic: %u Seg: %u\n",
1058 // (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1059 // *segmentInOutIndex);
1060
1061 break;
1062
1063 case TPL_TASKS_CONTINUE:
1064
1065 // Update the Dependency List for Right and Bottom Neighbors
1066 segment_index = *segmentInOutIndex;
1067 row_segment_index = segment_index / segmentPtr->segment_band_count;
1068
1069 right_segment_index = segment_index + 1;
1070 bottom_left_segment_index = segment_index + segmentPtr->segment_band_count;
1071
1072 // Right Neighbor
1073 if (segment_index < segmentPtr->row_array[row_segment_index].ending_seg_index) {
1074 svt_block_on_mutex(segmentPtr->row_array[row_segment_index].assignment_mutex);
1075
1076 --segmentPtr->dep_map.dependency_map[right_segment_index];
1077
1078 if (segmentPtr->dep_map.dependency_map[right_segment_index] == 0) {
1079 *segmentInOutIndex = segmentPtr->row_array[row_segment_index].current_seg_index;
1080 ++segmentPtr->row_array[row_segment_index].current_seg_index;
1081 self_assigned = EB_TRUE;
1082 continue_processing_flag = EB_TRUE;
1083
1084 //fprintf(trace, "Start Pic: %u Seg: %u\n",
1085 // (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1086 // *segmentInOutIndex);
1087 }
1088
1089 svt_release_mutex(segmentPtr->row_array[row_segment_index].assignment_mutex);
1090 }
1091
1092 // Bottom-left Neighbor
1093 if (row_segment_index < segmentPtr->segment_row_count - 1 &&
1094 bottom_left_segment_index >=
1095 segmentPtr->row_array[row_segment_index + 1].starting_seg_index) {
1096 svt_block_on_mutex(segmentPtr->row_array[row_segment_index + 1].assignment_mutex);
1097
1098 --segmentPtr->dep_map.dependency_map[bottom_left_segment_index];
1099
1100 if (segmentPtr->dep_map.dependency_map[bottom_left_segment_index] == 0) {
1101 if (self_assigned == EB_TRUE)
1102 feedback_row_index = (int16_t)row_segment_index + 1;
1103 else {
1104 *segmentInOutIndex =
1105 segmentPtr->row_array[row_segment_index + 1].current_seg_index;
1106 ++segmentPtr->row_array[row_segment_index + 1].current_seg_index;
1107 continue_processing_flag = EB_TRUE;
1108
1109 //fprintf(trace, "Start Pic: %u Seg: %u\n",
1110 // (unsigned) ((PictureControlSet*) taskPtr->pcs_wrapper_ptr->object_ptr)->picture_number,
1111 // *segmentInOutIndex);
1112 }
1113 }
1114 svt_release_mutex(segmentPtr->row_array[row_segment_index + 1].assignment_mutex);
1115 }
1116
1117 if (feedback_row_index > 0) {
1118
1119 EbObjectWrapper *out_results_wrapper_ptr;
1120
1121 svt_get_empty_object(
1122 srmFifoPtr ,
1123 &out_results_wrapper_ptr);
1124
1125 TplDispResults *out_results_ptr = (TplDispResults*)out_results_wrapper_ptr->object_ptr;
1126 out_results_ptr->input_type = TPL_TASKS_ENCDEC_INPUT;
1127
1128 out_results_ptr->enc_dec_segment_row = feedback_row_index;
1129 out_results_ptr->tile_group_index = taskPtr->tile_group_index;
1130 out_results_ptr->qIndex = taskPtr->qIndex;
1131
1132 out_results_ptr->pcs_wrapper_ptr = taskPtr->pcs_wrapper_ptr;
1133 out_results_ptr->pcs_ptr = taskPtr->pcs_ptr;
1134 out_results_ptr->frame_index = frame_idx;
1135 svt_post_full_object(out_results_wrapper_ptr);
1136 }
1137
1138 break;
1139
1140 default: break;
1141 }
1142
1143 return continue_processing_flag;
1144 }
1145
1146
1147
1148
1149
1150 /************************************************
1151 * Genrate TPL MC Flow Dispenser Based on Lookahead
1152 ** LAD Window: sliding window size
1153 ************************************************/
1154
1155
tpl_mc_flow_dispenser(EncodeContext * encode_context_ptr,SequenceControlSet * scs_ptr,int32_t * base_rdmult,PictureParentControlSet * pcs_ptr,int32_t frame_idx,SourceBasedOperationsContext * context_ptr)1156 void tpl_mc_flow_dispenser(
1157 EncodeContext *encode_context_ptr,
1158 SequenceControlSet *scs_ptr,
1159 int32_t *base_rdmult,
1160 PictureParentControlSet *pcs_ptr,
1161 int32_t frame_idx,
1162 SourceBasedOperationsContext *context_ptr)
1163 {
1164 EbPictureBufferDesc *recon_picture_ptr = encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx];
1165
1166
1167 int32_t qIndex = quantizer_to_qindex[(uint8_t)scs_ptr->static_config.qp];
1168 if (pcs_ptr->tpl_ctrls.enable_tpl_qps){
1169 const double delta_rate_new[7][6] = {
1170 {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}, // 1L
1171 {0.6, 1.0, 1.0, 1.0, 1.0, 1.0}, // 2L
1172 {0.6, 0.8, 1.0, 1.0, 1.0, 1.0}, // 3L
1173 {0.6, 0.8, 0.9, 1.0, 1.0, 1.0}, // 4L
1174 {0.35, 0.6, 0.8, 0.9, 1.0, 1.0}, //5L
1175 {0.35, 0.6, 0.8, 0.9, 0.95, 1.0} //6L
1176 };
1177 double q_val;
1178 q_val = svt_av1_convert_qindex_to_q(qIndex, 8);
1179 int32_t delta_qindex;
1180 if (pcs_ptr->tpl_data.tpl_slice_type == I_SLICE)
1181 delta_qindex = svt_av1_compute_qdelta(q_val, q_val * 0.25, 8);
1182 else
1183 delta_qindex = svt_av1_compute_qdelta(
1184 q_val,
1185 q_val *
1186 delta_rate_new[pcs_ptr->hierarchical_levels]
1187 [pcs_ptr->tpl_data.tpl_temporal_layer_index],
1188 8);
1189 qIndex = (qIndex + delta_qindex);
1190 }
1191 *base_rdmult = svt_av1_compute_rd_mult_based_on_qindex((AomBitDepth)8/*scs_ptr->static_config.encoder_bit_depth*/, qIndex) / 6;
1192
1193 {
1194 {
1195
1196
1197 // reset number of TPLed sbs per pic
1198 pcs_ptr->tpl_disp_coded_sb_count = 0;
1199
1200 EbObjectWrapper *out_results_wrapper_ptr;
1201
1202 // TPL dispenser kernel
1203 svt_get_empty_object(
1204 context_ptr->sbo_output_fifo_ptr,
1205 &out_results_wrapper_ptr);
1206
1207 TplDispResults *out_results_ptr = (TplDispResults*)out_results_wrapper_ptr->object_ptr;
1208 // out_results_ptr->pcs_wrapper_ptr = pcs_ptr->p_pcs_wrapper_ptr;
1209 out_results_ptr->pcs_ptr = pcs_ptr;
1210 out_results_ptr->input_type = TPL_TASKS_MDC_INPUT;
1211 out_results_ptr->tile_group_index = /*tile_group_idx*/0;
1212
1213 out_results_ptr->frame_index = frame_idx;
1214 out_results_ptr->qIndex = qIndex;
1215
1216 svt_post_full_object(out_results_wrapper_ptr);
1217
1218 svt_block_on_semaphore(pcs_ptr->tpl_disp_done_semaphore); // we can do all in // ?
1219
1220
1221 }
1222 }
1223
1224 // padding current recon picture
1225 generate_padding(recon_picture_ptr->buffer_y,
1226 recon_picture_ptr->stride_y,
1227 recon_picture_ptr->width,
1228 recon_picture_ptr->height,
1229 recon_picture_ptr->origin_x,
1230 recon_picture_ptr->origin_y);
1231
1232 return;
1233 }
1234
1235
get_overlap_area(int grid_pos_row,int grid_pos_col,int ref_pos_row,int ref_pos_col,int block,int bsize)1236 static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row, int ref_pos_col,
1237 int block, int /*BLOCK_SIZE*/ bsize) {
1238 int width = 0, height = 0;
1239 int bw = 4 << mi_size_wide_log2[bsize];
1240 int bh = 4 << mi_size_high_log2[bsize];
1241
1242 switch (block) {
1243 case 0:
1244 width = grid_pos_col + bw - ref_pos_col;
1245 height = grid_pos_row + bh - ref_pos_row;
1246 break;
1247 case 1:
1248 width = ref_pos_col + bw - grid_pos_col;
1249 height = grid_pos_row + bh - ref_pos_row;
1250 break;
1251 case 2:
1252 width = grid_pos_col + bw - ref_pos_col;
1253 height = ref_pos_row + bh - grid_pos_row;
1254 break;
1255 case 3:
1256 width = ref_pos_col + bw - grid_pos_col;
1257 height = ref_pos_row + bh - grid_pos_row;
1258 break;
1259 default: assert(0);
1260 }
1261
1262 return width * height;
1263 }
1264
round_floor(int ref_pos,int bsize_pix)1265 static int round_floor(int ref_pos, int bsize_pix) {
1266 int round;
1267 if (ref_pos < 0)
1268 round = -(1 + (-ref_pos - 1) / bsize_pix);
1269 else
1270 round = ref_pos / bsize_pix;
1271
1272 return round;
1273 }
1274
delta_rate_cost(int64_t delta_rate,int64_t recrf_dist,int64_t srcrf_dist,int pix_num)1275 static int64_t delta_rate_cost(int64_t delta_rate, int64_t recrf_dist, int64_t srcrf_dist,
1276 int pix_num) {
1277 double beta = (double)srcrf_dist / recrf_dist;
1278 int64_t rate_cost = delta_rate;
1279
1280 if (srcrf_dist <= 128)
1281 return rate_cost;
1282
1283 double dr = (double)(delta_rate >> (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT)) / pix_num;
1284
1285 double log_den = log(beta) / log(2.0) + 2.0 * dr;
1286
1287 if (log_den > log(10.0) / log(2.0)) {
1288 rate_cost = (int64_t)((log(1.0 / beta) * pix_num) / log(2.0) / 2.0);
1289 rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT);
1290 return rate_cost;
1291 }
1292
1293 double num = pow(2.0, log_den);
1294 double den = num * beta + (1 - beta) * beta;
1295
1296 rate_cost = (int64_t)((pix_num * log(num / den)) / log(2.0) / 2.0);
1297
1298 rate_cost <<= (TPL_DEP_COST_SCALE_LOG2 + AV1_PROB_COST_SHIFT);
1299
1300 return rate_cost;
1301 }
1302 /************************************************
1303 * Genrate TPL MC Flow Synthesizer
1304 ************************************************/
1305
1306
tpl_model_update_b(PictureParentControlSet * ref_pcs_ptr,PictureParentControlSet * pcs_ptr,TplStats * tpl_stats_ptr,int mi_row,int mi_col,const int bsize)1307 static AOM_INLINE void tpl_model_update_b(PictureParentControlSet *ref_pcs_ptr, PictureParentControlSet *pcs_ptr,
1308 TplStats *tpl_stats_ptr,
1309 int mi_row, int mi_col,
1310 const int/*BLOCK_SIZE*/ bsize) {
1311 Av1Common *ref_cm = ref_pcs_ptr->av1_cm;
1312 TplStats * ref_tpl_stats_ptr;
1313
1314 const FULLPEL_MV full_mv = get_fullmv_from_mv(&tpl_stats_ptr->mv);
1315 const int ref_pos_row = mi_row * MI_SIZE + full_mv.row;
1316 const int ref_pos_col = mi_col * MI_SIZE + full_mv.col;
1317
1318 const int bw = 4 << mi_size_wide_log2[bsize];
1319 const int bh = 4 << mi_size_high_log2[bsize];
1320 const int mi_height = mi_size_high[bsize];
1321 const int mi_width = mi_size_wide[bsize];
1322 const int pix_num = bw * bh;
1323 const int shift = pcs_ptr->is_720p_or_larger ? 2 : 1;
1324 const int mi_cols_sr = ((ref_pcs_ptr->aligned_width + 15) / 16) << 2;
1325
1326 // top-left on grid block location in pixel
1327 int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
1328 int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
1329 int block;
1330
1331 int64_t cur_dep_dist = tpl_stats_ptr->recrf_dist - tpl_stats_ptr->srcrf_dist;
1332 int64_t mc_dep_dist = (int64_t)(
1333 tpl_stats_ptr->mc_dep_dist *
1334 ((double)(tpl_stats_ptr->recrf_dist - tpl_stats_ptr->srcrf_dist) /
1335 tpl_stats_ptr->recrf_dist));
1336 int64_t delta_rate = tpl_stats_ptr->recrf_rate - tpl_stats_ptr->srcrf_rate;
1337 int64_t mc_dep_rate = pcs_ptr->tpl_ctrls.tpl_opt_flag ? 0
1338
1339 : delta_rate_cost(tpl_stats_ptr->mc_dep_rate,
1340 tpl_stats_ptr->recrf_dist,
1341 tpl_stats_ptr->srcrf_dist,
1342 pix_num);
1343
1344 for (block = 0; block < 4; ++block) {
1345 int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
1346 int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
1347
1348 if (grid_pos_row >= 0 && grid_pos_row < ref_cm->mi_rows * MI_SIZE && grid_pos_col >= 0 &&
1349 grid_pos_col < ref_cm->mi_cols * MI_SIZE) {
1350 int overlap_area = get_overlap_area(
1351 grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
1352 int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
1353 int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
1354 const int step = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
1355
1356 for (int idy = 0; idy < mi_height; idy += step) {
1357 for (int idx = 0; idx < mi_width; idx += step) {
1358 ref_tpl_stats_ptr = ref_pcs_ptr->tpl_stats[((ref_mi_row + idy) >> shift) *
1359 (mi_cols_sr >> shift) +
1360 ((ref_mi_col + idx) >> shift)];
1361 ref_tpl_stats_ptr->mc_dep_dist += ((cur_dep_dist + mc_dep_dist) *
1362 overlap_area) /
1363 pix_num;
1364 ref_tpl_stats_ptr->mc_dep_rate += ((delta_rate + mc_dep_rate) * overlap_area) /
1365 pix_num;
1366 assert(overlap_area >= 0);
1367 }
1368 }
1369 }
1370 }
1371 }
1372
1373 /************************************************
1374 * Genrate TPL MC Flow Synthesizer
1375 ************************************************/
1376
1377
tpl_model_update(PictureParentControlSet * pcs_array[MAX_TPL_LA_SW],int32_t frame_idx,int mi_row,int mi_col,const int bsize,uint8_t frames_in_sw)1378 static AOM_INLINE void tpl_model_update(
1379 PictureParentControlSet *pcs_array[MAX_TPL_LA_SW],
1380 int32_t frame_idx, int mi_row, int mi_col,
1381 const int/*BLOCK_SIZE*/ bsize, uint8_t frames_in_sw) {
1382 const int mi_height = mi_size_high[bsize];
1383 const int mi_width = mi_size_wide[bsize];
1384 PictureParentControlSet *pcs_ptr = pcs_array[frame_idx];
1385 const int /*BLOCK_SIZE*/ block_size = pcs_ptr->is_720p_or_larger ? BLOCK_16X16 : BLOCK_8X8;
1386 const int step = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
1387 const int shift = pcs_ptr->is_720p_or_larger ? 2 : 1;
1388 const int mi_cols_sr = ((pcs_ptr->aligned_width + 15) / 16) << 2;
1389 int i = 0;
1390
1391 for (int idy = 0; idy < mi_height; idy += step) {
1392 for (int idx = 0; idx < mi_width; idx += step) {
1393 TplStats *tpl_stats_ptr =
1394 pcs_ptr->tpl_stats[(((mi_row + idy) >> shift) * (mi_cols_sr >> shift)) +
1395 ((mi_col + idx) >> shift)];
1396
1397 while (i < frames_in_sw && pcs_array[i]->picture_number != tpl_stats_ptr->ref_frame_poc)
1398 i++;
1399 if (i < frames_in_sw)
1400 tpl_model_update_b(
1401 pcs_array[i], pcs_ptr, tpl_stats_ptr, mi_row + idy, mi_col + idx, block_size);
1402 }
1403 }
1404 }
1405
1406
1407
1408 /************************************************
1409 * Genrate TPL MC Flow Synthesizer Based on Lookahead
1410 ** LAD Window: sliding window size
1411 ************************************************/
1412
1413
tpl_mc_flow_synthesizer(PictureParentControlSet * pcs_array[MAX_TPL_LA_SW],int32_t frame_idx,uint8_t frames_in_sw)1414 void tpl_mc_flow_synthesizer(
1415 PictureParentControlSet *pcs_array[MAX_TPL_LA_SW],
1416 int32_t frame_idx,
1417 uint8_t frames_in_sw)
1418 {
1419 Av1Common * cm = pcs_array[frame_idx]->av1_cm;
1420 const int /*BLOCK_SIZE*/ bsize = BLOCK_16X16;
1421 const int mi_height = mi_size_high[bsize];
1422 const int mi_width = mi_size_wide[bsize];
1423
1424 for (int mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
1425 for (int mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
1426 tpl_model_update(pcs_array, frame_idx, mi_row, mi_col, bsize, frames_in_sw);
1427 }
1428 }
1429 return;
1430 }
1431
1432
generate_r0beta(PictureParentControlSet * pcs_ptr)1433 static void generate_r0beta(PictureParentControlSet *pcs_ptr) {
1434 Av1Common * cm = pcs_ptr->av1_cm;
1435 SequenceControlSet *scs_ptr = pcs_ptr->scs_ptr;
1436 int64_t intra_cost_base = 0;
1437 int64_t mc_dep_cost_base = 0;
1438 const int step = 1 << (pcs_ptr->is_720p_or_larger ? 2 : 1);
1439 const int mi_cols_sr = ((pcs_ptr->aligned_width + 15) / 16) << 2;
1440 const int shift = pcs_ptr->is_720p_or_larger ? 2 : 1;
1441
1442 for (int row = 0; row < cm->mi_rows; row += step) {
1443 for (int col = 0; col < mi_cols_sr; col += step) {
1444 TplStats *tpl_stats_ptr =
1445 pcs_ptr->tpl_stats[(row >> shift) * (mi_cols_sr >> shift) + (col >> shift)];
1446 int64_t mc_dep_delta = RDCOST(
1447 pcs_ptr->base_rdmult, tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->mc_dep_dist);
1448 intra_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS);
1449 mc_dep_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
1450 }
1451 }
1452
1453 if (mc_dep_cost_base != 0) {
1454 pcs_ptr->r0 = (double)intra_cost_base / mc_dep_cost_base;
1455 pcs_ptr->tpl_is_valid = 1;
1456 }
1457 else {
1458 pcs_ptr->tpl_is_valid = 0;
1459 }
1460
1461 #if DEBUG_TPL
1462 SVT_LOG("generate_r0beta ------> poc %ld\t%.0f\t%.0f \t%.5f base_rdmult=%d\n",
1463 pcs_ptr->picture_number,
1464 (double)intra_cost_base,
1465 (double)mc_dep_cost_base,
1466 pcs_ptr->r0,
1467 pcs_ptr->base_rdmult);
1468 #endif
1469 generate_lambda_scaling_factor(pcs_ptr, mc_dep_cost_base);
1470
1471 const uint32_t sb_sz = scs_ptr->seq_header.sb_size == BLOCK_128X128 ? 128 : 64;
1472 const uint32_t picture_sb_width = (uint32_t)((scs_ptr->seq_header.max_frame_width + sb_sz - 1) /
1473 sb_sz);
1474 const uint32_t picture_sb_height = (uint32_t)(
1475 (scs_ptr->seq_header.max_frame_height + sb_sz - 1) / sb_sz);
1476 const uint32_t picture_width_in_mb = (scs_ptr->seq_header.max_frame_width + 16 - 1) / 16;
1477 const uint32_t picture_height_in_mb = (scs_ptr->seq_header.max_frame_height + 16 - 1) / 16;
1478 const uint32_t blks = scs_ptr->seq_header.sb_size == BLOCK_128X128
1479 ? (128 >> (3 + pcs_ptr->is_720p_or_larger))
1480 : (64 >> (3 + pcs_ptr->is_720p_or_larger));
1481 for (uint32_t sb_y = 0; sb_y < picture_sb_height; ++sb_y) {
1482 for (uint32_t sb_x = 0; sb_x < picture_sb_width; ++sb_x) {
1483 int64_t intra_cost = 0;
1484 int64_t mc_dep_cost = 0;
1485 for (uint32_t blky_offset = 0; blky_offset < blks; blky_offset++) {
1486 for (uint32_t blkx_offset = 0; blkx_offset < blks; blkx_offset++) {
1487 uint32_t blkx = ((sb_x * sb_sz) >> (3 + pcs_ptr->is_720p_or_larger)) +
1488 blkx_offset;
1489 uint32_t blky = ((sb_y * sb_sz) >> (3 + pcs_ptr->is_720p_or_larger)) +
1490 blky_offset;
1491 if ((blkx >> (1 - pcs_ptr->is_720p_or_larger)) >= picture_width_in_mb ||
1492 (blky >> (1 - pcs_ptr->is_720p_or_larger)) >= picture_height_in_mb)
1493 continue;
1494 TplStats *tpl_stats_ptr =
1495 pcs_ptr->tpl_stats[blky * (mi_cols_sr >> shift) + blkx];
1496 int64_t mc_dep_delta = RDCOST(pcs_ptr->base_rdmult,
1497 tpl_stats_ptr->mc_dep_rate,
1498 tpl_stats_ptr->mc_dep_dist);
1499 intra_cost += (tpl_stats_ptr->recrf_dist << RDDIV_BITS);
1500 mc_dep_cost += (tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
1501 }
1502 }
1503 double beta = 1.0;
1504 if (mc_dep_cost > 0 && intra_cost > 0) {
1505 double rk = (double)intra_cost / mc_dep_cost;
1506 beta = (pcs_ptr->r0 / rk);
1507 assert(beta > 0.0);
1508 }
1509 pcs_ptr->tpl_beta[sb_y * picture_sb_width + sb_x] = beta;
1510 }
1511 }
1512 return;
1513 }
1514 /************************************************
1515 * Allocate and initialize buffers needed for tpl
1516 ************************************************/
init_tpl_buffers(EncodeContext * encode_context_ptr,PictureParentControlSet * pcs_ptr)1517 EbErrorType init_tpl_buffers(
1518 EncodeContext *encode_context_ptr,
1519 PictureParentControlSet *pcs_ptr){
1520 int32_t frames_in_sw = MIN(MAX_TPL_LA_SW, pcs_ptr->tpl_group_size);
1521 int32_t frame_idx;
1522
1523 for (frame_idx = 0; frame_idx < MAX_TPL_LA_SW; frame_idx++) {
1524 encode_context_ptr->poc_map_idx[frame_idx] = -1;
1525 encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] = NULL;
1526 }
1527 EbPictureBufferDescInitData picture_buffer_desc_init_data;
1528 picture_buffer_desc_init_data.max_width = pcs_ptr->enhanced_picture_ptr->max_width;
1529 picture_buffer_desc_init_data.max_height = pcs_ptr->enhanced_picture_ptr->max_height;
1530 picture_buffer_desc_init_data.bit_depth = pcs_ptr->enhanced_picture_ptr->bit_depth;
1531 picture_buffer_desc_init_data.color_format = pcs_ptr->enhanced_picture_ptr->color_format;
1532 picture_buffer_desc_init_data.buffer_enable_mask = PICTURE_BUFFER_DESC_Y_FLAG;
1533 picture_buffer_desc_init_data.left_padding = TPL_PADX;
1534 picture_buffer_desc_init_data.right_padding = TPL_PADX;
1535 picture_buffer_desc_init_data.top_padding = TPL_PADY;
1536 picture_buffer_desc_init_data.bot_padding = TPL_PADY;
1537 picture_buffer_desc_init_data.split_mode = EB_FALSE;
1538
1539 EB_NEW(encode_context_ptr->mc_flow_rec_picture_buffer_noref,
1540 svt_picture_buffer_desc_ctor,
1541 (EbPtr)&picture_buffer_desc_init_data);
1542
1543 for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1544 if (pcs_ptr->tpl_valid_pic[frame_idx]) {
1545 EB_NEW(encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx],
1546 svt_picture_buffer_desc_ctor,
1547 (EbPtr)&picture_buffer_desc_init_data);
1548 } else {
1549 encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] =
1550 encode_context_ptr->mc_flow_rec_picture_buffer_noref;
1551 }
1552 }
1553 return EB_ErrorNone;
1554 }
1555
1556
1557
1558
1559 /************************************************
1560 * init tpl tpl_disp_segment_ctrl
1561 ************************************************/
init_tpl_segments(SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr,PictureParentControlSet ** pcs_array,int32_t frames_in_sw)1562 void init_tpl_segments(
1563 SequenceControlSet *scs_ptr,
1564 PictureParentControlSet *pcs_ptr,
1565 PictureParentControlSet **pcs_array,
1566 int32_t frames_in_sw) {
1567
1568 for (int32_t frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1569 uint32_t enc_dec_seg_col_cnt = scs_ptr->tpl_segment_col_count_array;
1570 uint32_t enc_dec_seg_row_cnt = scs_ptr->tpl_segment_row_count_array;
1571
1572 const int tile_cols = pcs_ptr->av1_cm->tiles_info.tile_cols;
1573 const int tile_rows = pcs_ptr->av1_cm->tiles_info.tile_rows;
1574 uint8_t tile_group_cols = MIN(
1575 tile_cols,
1576 scs_ptr->tile_group_col_count_array[pcs_ptr->temporal_layer_index]);
1577 uint8_t tile_group_rows = MIN(
1578 tile_rows,
1579 scs_ptr->tile_group_row_count_array[pcs_ptr->temporal_layer_index]);
1580
1581 // Valid when only one tile used
1582 // TPL segments + tiles (not working)
1583 // TPL segments are 64x64 SB based
1584 uint16_t pic_width_in_sb;
1585 uint16_t pic_height_in_sb;
1586 pic_width_in_sb = (pcs_ptr->aligned_width + scs_ptr->sb_sz - 1) / scs_ptr->sb_sz;
1587 pic_height_in_sb = (pcs_ptr->aligned_height + scs_ptr->sb_sz - 1) / scs_ptr->sb_sz;
1588
1589 if (tile_group_cols * tile_group_rows > 1) {
1590 enc_dec_seg_col_cnt = MIN(enc_dec_seg_col_cnt,
1591 (uint8_t)(pic_width_in_sb / tile_group_cols));
1592 enc_dec_seg_row_cnt = MIN(
1593 enc_dec_seg_row_cnt,
1594 (uint8_t)(pic_height_in_sb / tile_group_rows));
1595 }
1596 // Init segments within the tile group
1597 int sb_size_log2 = scs_ptr->seq_header.sb_size_log2;
1598
1599 uint8_t tile_group_col_start_tile_idx[1024];
1600 uint8_t tile_group_row_start_tile_idx[1024];
1601
1602 // Get the tile start index for tile group
1603 for (uint8_t c = 0; c <= tile_group_cols; c++) {
1604 tile_group_col_start_tile_idx[c] = c * tile_cols / tile_group_cols;
1605 }
1606 for (uint8_t r = 0; r <= tile_group_rows; r++) {
1607 tile_group_row_start_tile_idx[r] = r * tile_rows / tile_group_rows;
1608 }
1609
1610 for (uint8_t r = 0; r < tile_group_rows; r++) {
1611 for (uint8_t c = 0; c < tile_group_cols; c++) {
1612 uint16_t tile_group_idx = r * tile_group_cols + c;
1613 uint16_t top_left_tile_col_idx = tile_group_col_start_tile_idx[c];
1614 uint16_t top_left_tile_row_idx = tile_group_row_start_tile_idx[r];
1615 uint16_t bottom_right_tile_col_idx =
1616 tile_group_col_start_tile_idx[c + 1];
1617 uint16_t bottom_right_tile_row_idx =
1618 tile_group_row_start_tile_idx[r + 1];
1619
1620 TileGroupInfo *tg_info_ptr =
1621 &pcs_array[frame_idx]->tile_group_info[tile_group_idx];
1622
1623 tg_info_ptr->tile_group_tile_start_x = top_left_tile_col_idx;
1624 tg_info_ptr->tile_group_tile_end_x = bottom_right_tile_col_idx;
1625
1626 tg_info_ptr->tile_group_tile_start_y = top_left_tile_row_idx;
1627 tg_info_ptr->tile_group_tile_end_y = bottom_right_tile_row_idx;
1628
1629 tg_info_ptr->tile_group_sb_start_x =
1630 pcs_ptr->av1_cm->tiles_info.tile_col_start_mi[top_left_tile_col_idx] >>
1631 sb_size_log2;
1632 tg_info_ptr->tile_group_sb_start_y =
1633 pcs_ptr->av1_cm->tiles_info.tile_row_start_mi[top_left_tile_row_idx] >>
1634 sb_size_log2;
1635
1636
1637
1638
1639 // Get the SB end of the bottom right tile
1640 tg_info_ptr->tile_group_sb_end_x = pic_width_in_sb ;
1641 //(pcs_ptr->av1_cm->tiles_info.tile_col_start_mi[bottom_right_tile_col_idx] >>
1642 // sb_size_log2);
1643 tg_info_ptr->tile_group_sb_end_y = pic_height_in_sb;
1644 //(pcs_ptr->av1_cm->tiles_info.tile_row_start_mi[bottom_right_tile_row_idx] >>
1645 // sb_size_log2);
1646
1647 // Get the width/height of tile group in SB
1648 tg_info_ptr->tile_group_height_in_sb =
1649 tg_info_ptr->tile_group_sb_end_y -
1650 tg_info_ptr->tile_group_sb_start_y;
1651 tg_info_ptr->tile_group_width_in_sb =
1652 tg_info_ptr->tile_group_sb_end_x -
1653 tg_info_ptr->tile_group_sb_start_x;
1654
1655 enc_dec_segments_init(
1656 pcs_array[frame_idx]->tpl_disp_segment_ctrl[tile_group_idx],
1657 enc_dec_seg_col_cnt,
1658 enc_dec_seg_row_cnt,
1659 tg_info_ptr->tile_group_width_in_sb,
1660 tg_info_ptr->tile_group_height_in_sb);
1661 }
1662 }
1663 }
1664 }
1665
1666
1667 /************************************************
1668 * Genrate TPL MC Flow Based on frames in the tpl group
1669 ************************************************/
tpl_mc_flow(EncodeContext * encode_context_ptr,SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr,SourceBasedOperationsContext * context_ptr)1670 EbErrorType tpl_mc_flow(EncodeContext *encode_context_ptr, SequenceControlSet *scs_ptr,
1671 PictureParentControlSet *pcs_ptr, SourceBasedOperationsContext *context_ptr) {
1672
1673 int32_t frames_in_sw = MIN(MAX_TPL_LA_SW, pcs_ptr->tpl_group_size);
1674 int32_t frame_idx;
1675 uint32_t shift = pcs_ptr->is_720p_or_larger ? 0 : 1;
1676 uint32_t picture_width_in_mb = (pcs_ptr->enhanced_picture_ptr->width + 16 - 1) / 16;
1677 uint32_t picture_height_in_mb = (pcs_ptr->enhanced_picture_ptr->height + 16 - 1) / 16;
1678
1679 //wait for PA ME to be done.
1680 for (uint32_t i = 1; i < pcs_ptr->tpl_group_size; i++) {
1681 svt_wait_cond_var(&pcs_ptr->tpl_group[i]->me_ready, 0);
1682 }
1683 pcs_ptr->tpl_is_valid = 0;
1684 init_tpl_buffers(encode_context_ptr, pcs_ptr);
1685
1686 if (pcs_ptr->tpl_group[0]->tpl_data.tpl_temporal_layer_index == 0) {
1687
1688
1689 // no Tiles path
1690 if (scs_ptr->static_config.tile_rows == 0 && scs_ptr->static_config.tile_columns == 0 )
1691 init_tpl_segments(
1692 scs_ptr,
1693 pcs_ptr,
1694 pcs_ptr->tpl_group,
1695 frames_in_sw) ;
1696
1697
1698
1699 uint8_t tpl_on;
1700 encode_context_ptr->poc_map_idx[0] = pcs_ptr->tpl_group[0]->picture_number;
1701 for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1702 encode_context_ptr->poc_map_idx[frame_idx] = pcs_ptr->tpl_group[frame_idx]->picture_number;
1703 for (uint32_t blky = 0; blky < (picture_height_in_mb << shift); blky++) {
1704 memset(pcs_ptr->tpl_group[frame_idx]->tpl_stats[blky * (picture_width_in_mb << shift)],
1705 0,
1706 (picture_width_in_mb << shift) * sizeof(TplStats));
1707 }
1708 if(scs_ptr->lad_mg)
1709 tpl_on = pcs_ptr->tpl_valid_pic[frame_idx];
1710 else {
1711 tpl_on = !(pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_nref);
1712 tpl_on = (pcs_ptr->tpl_group[0]->slice_type == I_SLICE) ? 1 : tpl_on;
1713 if (tpl_on == 0) {
1714 tpl_on = pcs_ptr->tpl_group[frame_idx]->tpl_data.is_used_as_reference_flag ? 1 :
1715 (ABS((int64_t)pcs_ptr->tpl_group[0]->picture_number -
1716 (int64_t)pcs_ptr->tpl_group[frame_idx]->picture_number)
1717 <= pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_pic_dist) ? 1 : tpl_on;
1718 }
1719 }
1720 if (tpl_on)
1721 tpl_mc_flow_dispenser(encode_context_ptr, scs_ptr, &pcs_ptr->base_rdmult, pcs_ptr->tpl_group[frame_idx], frame_idx,context_ptr);
1722 }
1723
1724 // synthesizer
1725 for (frame_idx = frames_in_sw - 1; frame_idx >= 0; frame_idx--) {
1726 if(scs_ptr->lad_mg)
1727 tpl_on = pcs_ptr->tpl_valid_pic[frame_idx];
1728 else {
1729 tpl_on = !(pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_nref);
1730 tpl_on = (pcs_ptr->tpl_group[0]->slice_type == I_SLICE) ? 1 : tpl_on;
1731 if (tpl_on == 0) {
1732 tpl_on = pcs_ptr->tpl_group[frame_idx]->tpl_data.is_used_as_reference_flag ? 1 :
1733 (ABS((int64_t)pcs_ptr->tpl_group[0]->picture_number -
1734 (int64_t)pcs_ptr->tpl_group[frame_idx]->picture_number)
1735 <= pcs_ptr->tpl_group[0]->tpl_ctrls.disable_tpl_pic_dist) ? 1 : tpl_on;
1736 }
1737 }
1738 if (tpl_on)
1739 tpl_mc_flow_synthesizer(pcs_ptr->tpl_group, frame_idx, frames_in_sw);
1740 }
1741
1742 // generate tpl stats
1743 generate_r0beta(pcs_ptr);
1744 #if DEBUG_TPL
1745 SVT_LOG("LOG displayorder:%ld\n",
1746 pcs_array[0]->picture_number);
1747 for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++)
1748 {
1749 PictureParentControlSet *pcs_ptr_tmp = pcs_array[frame_idx];
1750 Av1Common *cm = pcs_ptr->av1_cm;
1751 SequenceControlSet *scs_ptr = pcs_ptr_tmp->scs_ptr;
1752 int64_t intra_cost_base = 0;
1753 int64_t mc_dep_cost_base = 0;
1754 const int step = 1 << (pcs_ptr_tmp->is_720p_or_larger ? 2 : 1);
1755 const int mi_cols_sr = ((pcs_ptr_tmp->aligned_width + 15) / 16) << 2;
1756 const int shift = pcs_ptr_tmp->is_720p_or_larger ? 2 : 1;
1757
1758 for (int row = 0; row < cm->mi_rows; row += step) {
1759 for (int col = 0; col < mi_cols_sr; col += step) {
1760 TplStats *tpl_stats_ptr = pcs_ptr_tmp->tpl_stats[(row >> shift) * (mi_cols_sr >> shift) + (col >> shift)];
1761 int64_t mc_dep_delta =
1762 RDCOST(pcs_ptr->base_rdmult, tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->mc_dep_dist);
1763 intra_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS);
1764 mc_dep_cost_base += (tpl_stats_ptr->recrf_dist << RDDIV_BITS) + mc_dep_delta;
1765 }
1766 }
1767
1768 SVT_LOG("After mc_flow_synthesizer:\tframe_indx:%d\tdisplayorder:%ld\tIntra:%lld\tmc_dep:%lld rdmult:%i\n",
1769 frame_idx, pcs_ptr_tmp->picture_number, intra_cost_base, mc_dep_cost_base, pcs_ptr->base_rdmult);
1770 }
1771 #endif
1772
1773
1774
1775 }
1776
1777 for (frame_idx = 0; frame_idx < frames_in_sw; frame_idx++) {
1778 if (encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] &&
1779 encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx] !=
1780 encode_context_ptr->mc_flow_rec_picture_buffer_noref)
1781 EB_DELETE(encode_context_ptr->mc_flow_rec_picture_buffer[frame_idx]);
1782 }
1783 EB_DELETE(encode_context_ptr->mc_flow_rec_picture_buffer_noref);
1784
1785 for (uint32_t i = 0; i < pcs_ptr->tpl_group_size; i++) {
1786 if (pcs_ptr->tpl_group[i]->slice_type == P_SLICE) {
1787 if (pcs_ptr->tpl_group[i]->ext_mg_id == pcs_ptr->ext_mg_id + 1)
1788 release_pa_reference_objects(scs_ptr, pcs_ptr->tpl_group[i]);
1789 }
1790 else {
1791 if (pcs_ptr->tpl_group[i]->ext_mg_id == pcs_ptr->ext_mg_id)
1792 release_pa_reference_objects(scs_ptr, pcs_ptr->tpl_group[i]);
1793 }
1794 if (pcs_ptr->tpl_group[i]->non_tf_input)
1795 EB_DELETE(pcs_ptr->tpl_group[i]->non_tf_input);
1796 }
1797
1798 return EB_ErrorNone;
1799 }
1800
1801
1802 /*
1803 TPL dispenser kernel
1804 process one picture of TPL group
1805 */
1806
1807
tpl_disp_kernel(void * input_ptr)1808 void *tpl_disp_kernel(void *input_ptr) {
1809 EbThreadContext * thread_context_ptr = (EbThreadContext *)input_ptr;
1810 TplDispenserContext *context_ptr =
1811 (TplDispenserContext *)thread_context_ptr->priv;
1812 EbObjectWrapper * in_results_wrapper_ptr;
1813 TplDispResults *in_results_ptr;
1814 for (;;) {
1815 // Get Input Full Object
1816 EB_GET_FULL_OBJECT(context_ptr->tpl_disp_input_fifo_ptr,
1817 &in_results_wrapper_ptr);
1818
1819 in_results_ptr = (TplDispResults *)in_results_wrapper_ptr->object_ptr;
1820
1821 PictureParentControlSet* pcs_ptr = in_results_ptr->pcs_ptr;
1822
1823 SequenceControlSet* scs_ptr = (SequenceControlSet *)pcs_ptr->scs_ptr;
1824
1825 int32_t frame_idx =in_results_ptr->frame_index;
1826 context_ptr->coded_sb_count = 0;
1827
1828 uint16_t tile_group_width_in_sb = pcs_ptr->tile_group_info[0/*context_ptr->tile_group_index*/] // 1 tile
1829 .tile_group_width_in_sb;
1830 EncDecSegments *segments_ptr;
1831
1832 segments_ptr = pcs_ptr->tpl_disp_segment_ctrl[0/*context_ptr->tile_group_index*/]; // 1 tile
1833 // Segments
1834 uint16_t segment_index;
1835
1836 uint8_t sb_sz = (uint8_t)scs_ptr->sb_sz ;
1837 uint8_t sb_size_log2 = (uint8_t)svt_log2f(sb_sz);
1838 uint32_t pic_width_in_sb = (pcs_ptr->aligned_width + sb_sz - 1) >> sb_size_log2;
1839
1840 segment_index = 0;
1841 // no Tiles path
1842 if (scs_ptr->static_config.tile_rows == 0 && scs_ptr->static_config.tile_columns == 0 ){
1843 // segments loop
1844 while (
1845 assign_tpl_segments(
1846 segments_ptr,
1847 &segment_index,
1848 in_results_ptr,
1849 frame_idx,
1850 context_ptr->tpl_disp_fb_fifo_ptr)
1851 == EB_TRUE) {
1852
1853 uint32_t x_sb_start_index;
1854 uint32_t y_sb_start_index;
1855 uint32_t sb_start_index;
1856 uint32_t sb_segment_count;
1857 uint32_t sb_segment_index;
1858 uint32_t segment_row_index;
1859 uint32_t segment_band_index;
1860 uint32_t segment_band_size;
1861 // SB Loop variables
1862 uint32_t x_sb_index;
1863 uint32_t y_sb_index;
1864
1865 x_sb_start_index = segments_ptr->x_start_array[segment_index];
1866 y_sb_start_index = segments_ptr->y_start_array[segment_index];
1867 sb_start_index = y_sb_start_index * tile_group_width_in_sb + x_sb_start_index;
1868 sb_segment_count = segments_ptr->valid_sb_count_array[segment_index];
1869
1870 segment_row_index = segment_index / segments_ptr->segment_band_count;
1871 segment_band_index =
1872 segment_index - segment_row_index * segments_ptr->segment_band_count;
1873 segment_band_size = (segments_ptr->sb_band_count * (segment_band_index + 1) +
1874 segments_ptr->segment_band_count - 1) /
1875 segments_ptr->segment_band_count;
1876
1877
1878 for (y_sb_index = y_sb_start_index, sb_segment_index = sb_start_index;
1879 sb_segment_index < sb_start_index + sb_segment_count;
1880 ++y_sb_index) {
1881 for (x_sb_index = x_sb_start_index;
1882 x_sb_index < tile_group_width_in_sb &&
1883 (x_sb_index + y_sb_index < segment_band_size) &&
1884 sb_segment_index < sb_start_index + sb_segment_count;
1885 ++x_sb_index, ++sb_segment_index) {
1886 uint16_t tile_group_y_sb_start =
1887 pcs_ptr->tile_group_info[0/*context_ptr->tile_group_index*/] // 1 tile
1888 .tile_group_sb_start_y;
1889 uint16_t tile_group_x_sb_start =
1890 pcs_ptr->tile_group_info[0/*context_ptr->tile_group_index*/] // 1 tile
1891 .tile_group_sb_start_x;
1892
1893 context_ptr->sb_index = (uint16_t)((y_sb_index + tile_group_y_sb_start) * pic_width_in_sb +
1894 x_sb_index + tile_group_x_sb_start);
1895
1896 // TPL dispenser per SB (64)
1897 tpl_mc_flow_dispenser_sb(
1898 pcs_ptr->scs_ptr->encode_context_ptr,
1899 scs_ptr,
1900 pcs_ptr,
1901 frame_idx,
1902 context_ptr->sb_index,
1903 in_results_ptr->qIndex);
1904
1905 context_ptr->coded_sb_count++;
1906
1907 }
1908
1909 x_sb_start_index = (x_sb_start_index > 0) ? x_sb_start_index - 1 : 0;
1910 }
1911 }
1912
1913 svt_block_on_mutex(pcs_ptr->tpl_disp_mutex);
1914 pcs_ptr->tpl_disp_coded_sb_count += (uint32_t)context_ptr->coded_sb_count;
1915 EbBool last_sb_flag = (pcs_ptr->sb_total_count == pcs_ptr->tpl_disp_coded_sb_count);
1916
1917 svt_release_mutex(pcs_ptr->tpl_disp_mutex);
1918 if (last_sb_flag)
1919 svt_post_semaphore(pcs_ptr->tpl_disp_done_semaphore);
1920 }
1921 else {
1922 // Tiles path does not suupport segments
1923 for (uint32_t sb_index = 0; sb_index < pcs_ptr->sb_total_count; ++sb_index) {
1924
1925 tpl_mc_flow_dispenser_sb(
1926 pcs_ptr->scs_ptr->encode_context_ptr,
1927 scs_ptr,
1928 pcs_ptr,
1929 frame_idx,
1930 sb_index,
1931 in_results_ptr->qIndex);
1932 }
1933 svt_post_semaphore(pcs_ptr->tpl_disp_done_semaphore);
1934
1935 }
1936 svt_release_object(in_results_wrapper_ptr);
1937
1938 }
1939 return NULL;
1940 }
1941
1942
1943
1944
1945 /************************************************
1946 * Source Based Operations Kernel
1947 * Source-based operations process involves a number of analysis algorithms
1948 * to identify spatiotemporal characteristics of the input pictures.
1949 ************************************************/
source_based_operations_kernel(void * input_ptr)1950 void *source_based_operations_kernel(void *input_ptr) {
1951 EbThreadContext * thread_context_ptr = (EbThreadContext *)input_ptr;
1952 SourceBasedOperationsContext *context_ptr = (SourceBasedOperationsContext *)
1953 thread_context_ptr->priv;
1954 PictureParentControlSet * pcs_ptr;
1955 EbObjectWrapper * in_results_wrapper_ptr;
1956 InitialRateControlResults *in_results_ptr;
1957 EbObjectWrapper * out_results_wrapper_ptr;
1958
1959 for (;;) {
1960 // Get Input Full Object
1961 EB_GET_FULL_OBJECT(context_ptr->initial_rate_control_results_input_fifo_ptr,
1962 &in_results_wrapper_ptr);
1963
1964 in_results_ptr = (InitialRateControlResults *)in_results_wrapper_ptr->object_ptr;
1965 pcs_ptr = (PictureParentControlSet *)in_results_ptr->pcs_wrapper_ptr->object_ptr;
1966 context_ptr->complete_sb_count = 0;
1967 uint32_t sb_total_count = pcs_ptr->sb_total_count;
1968 uint32_t sb_index;
1969
1970 SequenceControlSet *scs_ptr = (SequenceControlSet *)pcs_ptr->scs_wrapper_ptr->object_ptr;
1971 // Get TPL ME
1972
1973 if (scs_ptr->static_config.enable_tpl_la) {
1974
1975 if (scs_ptr->static_config.enable_tpl_la &&
1976 pcs_ptr->temporal_layer_index == 0) {
1977
1978 tpl_prep_info(pcs_ptr);
1979 tpl_mc_flow(scs_ptr->encode_context_ptr, scs_ptr, pcs_ptr,context_ptr);
1980 }
1981 }
1982
1983 /***********************************************SB-based operations************************************************************/
1984 for (sb_index = 0; sb_index < sb_total_count; ++sb_index) {
1985 SbParams *sb_params = &pcs_ptr->sb_params_array[sb_index];
1986 EbBool is_complete_sb = sb_params->is_complete_sb;
1987 if (is_complete_sb) {
1988 context_ptr->complete_sb_count++;
1989 }
1990 }
1991 /*********************************************Picture-based operations**********************************************************/
1992
1993 // Activity statistics derivation
1994 derive_picture_activity_statistics(pcs_ptr);
1995
1996 // Get Empty Results Object
1997 svt_get_empty_object(context_ptr->picture_demux_results_output_fifo_ptr,
1998 &out_results_wrapper_ptr);
1999
2000 PictureDemuxResults *out_results_ptr = (PictureDemuxResults *)
2001 out_results_wrapper_ptr->object_ptr;
2002 out_results_ptr->pcs_wrapper_ptr = in_results_ptr->pcs_wrapper_ptr;
2003 out_results_ptr->picture_type = EB_PIC_INPUT;
2004
2005 // Release the Input Results
2006 svt_release_object(in_results_wrapper_ptr);
2007
2008 // Post the Full Results Object
2009 svt_post_full_object(out_results_wrapper_ptr);
2010 }
2011 return NULL;
2012 }
2013