1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <math.h>
14 #include <stdio.h>
15
16 #include "aom_dsp_rtcd.h"
17 #include "EbDefinitions.h"
18 #include "EbRateControlProcess.h"
19 #include "EbSequenceControlSet.h"
20 #include "EbPictureControlSet.h"
21 #include "firstpass.h"
22 #include "EbLog.h"
23 #include "EbModeDecisionProcess.h"
24 #include "EbCodingLoop.h"
25 #include "dwt.h" // to move to firstpass.c
26 #include "EbPictureDecisionProcess.h"
27 #include "EbModeDecisionConfigurationProcess.h"
28 #include "mv.h"
29 #ifdef ARCH_X86_64
30 #include <xmmintrin.h>
31 #endif
32 #include "EbMotionEstimation.h"
33 #include "EbPictureDecisionResults.h"
34
35 //#include "EbMotionEstimationProcess.h"
36 #undef _MM_HINT_T2
37 #define _MM_HINT_T2 1
38
39 #define OUTPUT_FPF 0
40
41 #define INTRA_MODE_PENALTY 1024
42 #define NEW_MV_MODE_PENALTY 32
43 #define DARK_THRESH 64
44
45 #define NCOUNT_INTRA_THRESH 8192
46 #define NCOUNT_INTRA_FACTOR 3
47
48 #define STATS_CAPABILITY_INIT 100
49 //1.5 times larger than request.
50 #define STATS_CAPABILITY_GROW(s) (s * 3 / 2)
realloc_stats_out(SequenceControlSet * scs_ptr,FirstPassStatsOut * out,uint64_t frame_number)51 static EbErrorType realloc_stats_out(SequenceControlSet *scs_ptr, FirstPassStatsOut *out,
52 uint64_t frame_number) {
53 if (frame_number < out->size)
54 return EB_ErrorNone;
55
56 if ((int64_t)frame_number >= (int64_t)out->capability - 1) {
57 size_t capability = (int64_t)frame_number >= (int64_t)STATS_CAPABILITY_INIT - 1
58 ? STATS_CAPABILITY_GROW(frame_number)
59 : STATS_CAPABILITY_INIT;
60 if (scs_ptr->lap_enabled) {
61 //store the data points before re-allocation
62 uint64_t stats_in_start_offset = 0;
63 uint64_t stats_in_offset = 0;
64 uint64_t stats_in_end_offset = 0;
65 if (frame_number) {
66 stats_in_start_offset = scs_ptr->twopass.stats_buf_ctx->stats_in_start - out->stat;
67 stats_in_offset = scs_ptr->twopass.stats_in - out->stat;
68 stats_in_end_offset = scs_ptr->twopass.stats_buf_ctx->stats_in_end_write - out->stat;
69 }
70 EB_REALLOC_ARRAY(out->stat, capability);
71 // restore the pointers after re-allocation is done
72 scs_ptr->twopass.stats_buf_ctx->stats_in_start = out->stat + stats_in_start_offset;
73 scs_ptr->twopass.stats_in = out->stat + stats_in_offset;
74 scs_ptr->twopass.stats_buf_ctx->stats_in_end_write = out->stat + stats_in_end_offset;
75 } else {
76 EB_REALLOC_ARRAY(out->stat, capability);
77 }
78 out->capability = capability;
79 }
80 out->size = frame_number + 1;
81 return EB_ErrorNone;
82 }
83
output_stats(SequenceControlSet * scs_ptr,FIRSTPASS_STATS * stats,uint64_t frame_number)84 static AOM_INLINE void output_stats(SequenceControlSet *scs_ptr, FIRSTPASS_STATS *stats,
85 uint64_t frame_number) {
86 FirstPassStatsOut *stats_out = &scs_ptr->encode_context_ptr->stats_out;
87 svt_block_on_mutex(scs_ptr->encode_context_ptr->stat_file_mutex);
88 if (realloc_stats_out(scs_ptr, stats_out, frame_number) != EB_ErrorNone) {
89 SVT_ERROR("realloc_stats_out request %d entries failed failed\n", frame_number);
90 } else {
91 stats_out->stat[frame_number] = *stats;
92 }
93
94 // TEMP debug code
95 #if OUTPUT_FPF
96 {
97 FILE *fpfile;
98 if (frame_number == 0)
99 fpfile = fopen("firstpass.stt", "w");
100 else
101 fpfile = fopen("firstpass.stt", "a");
102 fprintf(fpfile,
103 "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
104 "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
105 "%12.4lf %12.4lf %12.0lf %12.4lf %12.4lf %12.4lf %12.4lf\n",
106 stats->frame,
107 stats->weight,
108 stats->intra_error,
109 stats->coded_error,
110 stats->sr_coded_error,
111 stats->pcnt_inter,
112 stats->pcnt_motion,
113 stats->pcnt_second_ref,
114 stats->pcnt_neutral,
115 stats->intra_skip_pct,
116 stats->inactive_zone_rows,
117 stats->inactive_zone_cols,
118 stats->MVr,
119 stats->mvr_abs,
120 stats->MVc,
121 stats->mvc_abs,
122 stats->MVrv,
123 stats->MVcv,
124 stats->mv_in_out_count,
125 stats->new_mv_count,
126 stats->count,
127 stats->duration);
128 fclose(fpfile);
129 }
130 #endif
131 svt_release_mutex(scs_ptr->encode_context_ptr->stat_file_mutex);
132 }
svt_av1_twopass_zero_stats(FIRSTPASS_STATS * section)133 void svt_av1_twopass_zero_stats(FIRSTPASS_STATS *section) {
134 section->frame = 0.0;
135 section->weight = 0.0;
136 section->intra_error = 0.0;
137 section->coded_error = 0.0;
138 section->sr_coded_error = 0.0;
139 section->pcnt_inter = 0.0;
140 section->pcnt_motion = 0.0;
141 section->pcnt_second_ref = 0.0;
142 section->pcnt_neutral = 0.0;
143 section->intra_skip_pct = 0.0;
144 section->inactive_zone_rows = 0.0;
145 section->inactive_zone_cols = 0.0;
146 section->MVr = 0.0;
147 section->mvr_abs = 0.0;
148 section->MVc = 0.0;
149 section->mvc_abs = 0.0;
150 section->MVrv = 0.0;
151 section->MVcv = 0.0;
152 section->mv_in_out_count = 0.0;
153 section->new_mv_count = 0.0;
154 section->count = 0.0;
155 section->duration = 1.0;
156 section->raw_error_stdev = 0.0;
157 section->pcnt_third_ref = 0.0;
158 section->tr_coded_error = 0.0;
159 }
svt_av1_accumulate_stats(FIRSTPASS_STATS * section,const FIRSTPASS_STATS * frame)160 void svt_av1_accumulate_stats(FIRSTPASS_STATS *section, const FIRSTPASS_STATS *frame) {
161 section->frame += frame->frame;
162 section->weight += frame->weight;
163 section->intra_error += frame->intra_error;
164 section->coded_error += frame->coded_error;
165 section->sr_coded_error += frame->sr_coded_error;
166 section->pcnt_inter += frame->pcnt_inter;
167 section->pcnt_motion += frame->pcnt_motion;
168 section->pcnt_second_ref += frame->pcnt_second_ref;
169 section->pcnt_neutral += frame->pcnt_neutral;
170 section->intra_skip_pct += frame->intra_skip_pct;
171 section->inactive_zone_rows += frame->inactive_zone_rows;
172 section->inactive_zone_cols += frame->inactive_zone_cols;
173 section->MVr += frame->MVr;
174 section->mvr_abs += frame->mvr_abs;
175 section->MVc += frame->MVc;
176 section->mvc_abs += frame->mvc_abs;
177 section->MVrv += frame->MVrv;
178 section->MVcv += frame->MVcv;
179 section->mv_in_out_count += frame->mv_in_out_count;
180 section->new_mv_count += frame->new_mv_count;
181 section->count += frame->count;
182 section->duration += frame->duration;
183 }
svt_av1_end_first_pass(PictureParentControlSet * pcs_ptr)184 void svt_av1_end_first_pass(PictureParentControlSet *pcs_ptr) {
185 SequenceControlSet *scs_ptr = pcs_ptr->scs_ptr;
186 TWO_PASS * twopass = &scs_ptr->twopass;
187
188 if (twopass->stats_buf_ctx->total_stats) {
189 // add the total to the end of the file
190 output_stats(scs_ptr, twopass->stats_buf_ctx->total_stats, pcs_ptr->picture_number + 1);
191 }
192 }
raw_motion_error_stdev(int * raw_motion_err_list,int raw_motion_err_counts)193 static double raw_motion_error_stdev(int *raw_motion_err_list, int raw_motion_err_counts) {
194 int64_t sum_raw_err = 0;
195 double raw_err_avg = 0;
196 double raw_err_stdev = 0;
197 if (raw_motion_err_counts == 0)
198 return 0;
199
200 int i;
201 for (i = 0; i < raw_motion_err_counts; i++) { sum_raw_err += raw_motion_err_list[i]; }
202 raw_err_avg = (double)sum_raw_err / raw_motion_err_counts;
203 for (i = 0; i < raw_motion_err_counts; i++) {
204 raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) *
205 (raw_motion_err_list[i] - raw_err_avg);
206 }
207 // Calculate the standard deviation for the motion error of all the inter
208 // blocks of the 0,0 motion using the last source
209 // frame as the reference.
210 raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts);
211 return raw_err_stdev;
212 }
213 #define UL_INTRA_THRESH 50
214 #define INVALID_ROW -1
215 // Accumulates motion vector stats.
216 // Modifies member variables of "stats".
accumulate_mv_stats(const MV best_mv,const FULLPEL_MV mv,const int mb_row,const int mb_col,const int mb_rows,const int mb_cols,MV * last_mv,FRAME_STATS * stats)217 void accumulate_mv_stats(const MV best_mv, const FULLPEL_MV mv, const int mb_row, const int mb_col,
218 const int mb_rows, const int mb_cols, MV *last_mv, FRAME_STATS *stats) {
219 if (is_zero_mv(&best_mv))
220 return;
221
222 ++stats->mv_count;
223 // Non-zero vector, was it different from the last non zero vector?
224 if (!is_equal_mv(&best_mv, last_mv))
225 ++stats->new_mv_count;
226 *last_mv = best_mv;
227
228 // Does the row vector point inwards or outwards?
229 if (mb_row < mb_rows / 2) {
230 if (mv.row > 0) {
231 --stats->sum_in_vectors;
232 } else if (mv.row < 0) {
233 ++stats->sum_in_vectors;
234 }
235 } else if (mb_row > mb_rows / 2) {
236 if (mv.row > 0) {
237 ++stats->sum_in_vectors;
238 } else if (mv.row < 0) {
239 --stats->sum_in_vectors;
240 }
241 }
242
243 // Does the col vector point inwards or outwards?
244 if (mb_col < mb_cols / 2) {
245 if (mv.col > 0) {
246 --stats->sum_in_vectors;
247 } else if (mv.col < 0) {
248 ++stats->sum_in_vectors;
249 }
250 } else if (mb_col > mb_cols / 2) {
251 if (mv.col > 0) {
252 ++stats->sum_in_vectors;
253 } else if (mv.col < 0) {
254 --stats->sum_in_vectors;
255 }
256 }
257 }
258 // Updates the first pass stats of this frame.
259 // Input:
260 // cpi: the encoder setting. Only a few params in it will be used.
261 // stats: stats accumulated for this frame.
262 // raw_err_stdev: the statndard deviation for the motion error of all the
263 // inter blocks of the (0,0) motion using the last source
264 // frame as the reference.
265 // frame_number: current frame number.
266 // ts_duration: Duration of the frame / collection of frames.
267 // Updates:
268 // twopass->total_stats: the accumulated stats.
269 // twopass->stats_buf_ctx->stats_in_end: the pointer to the current stats,
270 // update its value and its position
271 // in the buffer.
update_firstpass_stats(PictureParentControlSet * pcs_ptr,const FRAME_STATS * const stats,const double raw_err_stdev,const int frame_number,const int64_t ts_duration)272 static void update_firstpass_stats(PictureParentControlSet *pcs_ptr, const FRAME_STATS *const stats,
273 const double raw_err_stdev, const int frame_number,
274 const int64_t ts_duration) {
275 SequenceControlSet *scs_ptr = pcs_ptr->scs_ptr;
276 TWO_PASS * twopass = &scs_ptr->twopass;
277
278 const uint32_t mb_cols = (scs_ptr->seq_header.max_frame_width + 16 - 1) / 16;
279 const uint32_t mb_rows = (scs_ptr->seq_header.max_frame_height + 16 - 1) / 16;
280 FIRSTPASS_STATS *this_frame_stats = twopass->stats_buf_ctx->stats_in_end_write;
281 FIRSTPASS_STATS fps;
282 // The minimum error here insures some bit allocation to frames even
283 // in static regions. The allocation per MB declines for larger formats
284 // where the typical "real" energy per MB also falls.
285 // Initial estimate here uses sqrt(mbs) to define the min_err, where the
286 // number of mbs is proportional to the image area.
287 const int num_mbs = mb_rows * mb_cols;
288 //(cpi->oxcf.resize_cfg.resize_mode != RESIZE_NONE)
289 // ? cpi->initial_mbs
290 // : mi_params->MBs;
291 const double min_err = 200 * sqrt(num_mbs);
292
293 if (pcs_ptr->skip_frame) {
294 FirstPassStatsOut * stats_out = &scs_ptr->encode_context_ptr->stats_out;
295 fps = stats_out->stat[frame_number - 1];
296 fps.frame = frame_number;
297 }else{
298 fps.weight = stats->intra_factor * stats->brightness_factor;
299 fps.frame = frame_number;
300 fps.coded_error = (double)(stats->coded_error >> 8) + min_err;
301 fps.sr_coded_error = (double)(stats->sr_coded_error >> 8) + min_err;
302 fps.tr_coded_error = (double)(stats->tr_coded_error >> 8) + min_err;
303 fps.intra_error = (double)(stats->intra_error >> 8) + min_err;
304 fps.count = 1.0;
305 fps.pcnt_inter = (double)stats->inter_count / num_mbs;
306 fps.pcnt_second_ref = (double)stats->second_ref_count / num_mbs;
307 fps.pcnt_third_ref = (double)stats->third_ref_count / num_mbs;
308 fps.pcnt_neutral = (double)stats->neutral_count / num_mbs;
309 fps.intra_skip_pct = (double)stats->intra_skip_count / num_mbs;
310 fps.inactive_zone_rows = (double)stats->image_data_start_row;
311 fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix
312 fps.raw_error_stdev = raw_err_stdev;
313
314 if (stats->mv_count > 0) {
315 fps.MVr = (double)stats->sum_mvr / stats->mv_count;
316 fps.mvr_abs = (double)stats->sum_mvr_abs / stats->mv_count;
317 fps.MVc = (double)stats->sum_mvc / stats->mv_count;
318 fps.mvc_abs = (double)stats->sum_mvc_abs / stats->mv_count;
319 fps.MVrv = ((double)stats->sum_mvrs -
320 ((double)stats->sum_mvr * stats->sum_mvr / stats->mv_count)) /
321 stats->mv_count;
322 fps.MVcv = ((double)stats->sum_mvcs -
323 ((double)stats->sum_mvc * stats->sum_mvc / stats->mv_count)) /
324 stats->mv_count;
325 fps.mv_in_out_count = (double)stats->sum_in_vectors / (stats->mv_count * 2);
326 fps.new_mv_count = stats->new_mv_count;
327 fps.pcnt_motion = (double)stats->mv_count / num_mbs;
328 } else {
329 fps.MVr = 0.0;
330 fps.mvr_abs = 0.0;
331 fps.MVc = 0.0;
332 fps.mvc_abs = 0.0;
333 fps.MVrv = 0.0;
334 fps.MVcv = 0.0;
335 fps.mv_in_out_count = 0.0;
336 fps.new_mv_count = 0.0;
337 fps.pcnt_motion = 0.0;
338 }
339
340 // TODO(paulwilkins): Handle the case when duration is set to 0, or
341 // something less than the full time between subsequent values of
342 // cpi->source_time_stamp.
343 fps.duration = (double)ts_duration;
344 }
345 // We will store the stats inside the persistent twopass struct (and NOT the
346 // local variable 'fps'), and then cpi->output_pkt_list will point to it.
347 *this_frame_stats = fps;
348 output_stats(scs_ptr, this_frame_stats, pcs_ptr->picture_number);
349 if (twopass->stats_buf_ctx->total_stats != NULL) {
350 svt_av1_accumulate_stats(twopass->stats_buf_ctx->total_stats, &fps);
351 }
352
353 /*In the case of two pass, first pass uses it as a circular buffer,
354 * when LAP is enabled it is used as a linear buffer*/
355 twopass->stats_buf_ctx->stats_in_end_write++;
356
357 if ((use_output_stat(scs_ptr)) &&
358 (twopass->stats_buf_ctx->stats_in_end_write >= twopass->stats_buf_ctx->stats_in_buf_end)) {
359 twopass->stats_buf_ctx->stats_in_end_write = twopass->stats_buf_ctx->stats_in_start;
360 }
361 }
362
accumulate_frame_stats(FRAME_STATS * mb_stats,int mb_rows,int mb_cols)363 static FRAME_STATS accumulate_frame_stats(FRAME_STATS *mb_stats, int mb_rows, int mb_cols) {
364 FRAME_STATS stats = {0};
365 int i, j;
366
367 stats.image_data_start_row = INVALID_ROW;
368 for (j = 0; j < mb_rows; j++) {
369 for (i = 0; i < mb_cols; i++) {
370 FRAME_STATS mb_stat = mb_stats[j * mb_cols + i];
371 stats.brightness_factor += mb_stat.brightness_factor;
372 stats.coded_error += mb_stat.coded_error;
373 if (stats.image_data_start_row == INVALID_ROW &&
374 mb_stat.image_data_start_row != INVALID_ROW) {
375 stats.image_data_start_row = mb_stat.image_data_start_row;
376 }
377 stats.inter_count += mb_stat.inter_count;
378 stats.intra_error += mb_stat.intra_error;
379 stats.intra_factor += mb_stat.intra_factor;
380 stats.intra_skip_count += mb_stat.intra_skip_count;
381 stats.mv_count += mb_stat.mv_count;
382 stats.neutral_count += mb_stat.neutral_count;
383 stats.new_mv_count += mb_stat.new_mv_count;
384 stats.second_ref_count += mb_stat.second_ref_count;
385 stats.sr_coded_error += mb_stat.sr_coded_error;
386 stats.sum_in_vectors += mb_stat.sum_in_vectors;
387 stats.sum_mvc += mb_stat.sum_mvc;
388 stats.sum_mvc_abs += mb_stat.sum_mvc_abs;
389 stats.sum_mvcs += mb_stat.sum_mvcs;
390 stats.sum_mvr += mb_stat.sum_mvr;
391 stats.sum_mvr_abs += mb_stat.sum_mvr_abs;
392 stats.sum_mvrs += mb_stat.sum_mvrs;
393 stats.third_ref_count += mb_stat.third_ref_count;
394 stats.tr_coded_error += mb_stat.tr_coded_error;
395 }
396 }
397 return stats;
398 }
399 /**************************************************
400 * Reset first pass stat
401 **************************************************/
setup_firstpass_data_seg(PictureParentControlSet * ppcs_ptr,int32_t segment_index)402 void setup_firstpass_data_seg(PictureParentControlSet *ppcs_ptr, int32_t segment_index) {
403 SequenceControlSet *scs_ptr = ppcs_ptr->scs_ptr;
404 FirstPassData * firstpass_data = &ppcs_ptr->firstpass_data;
405 const uint32_t mb_cols = (scs_ptr->seq_header.max_frame_width + 16 - 1) / 16;
406 const uint32_t mb_rows = (scs_ptr->seq_header.max_frame_height + 16 - 1) / 16;
407 EbPictureBufferDesc *input_picture_ptr = ppcs_ptr->enhanced_picture_ptr;
408
409 uint32_t blk_cols = (uint32_t)(input_picture_ptr->width + BLOCK_SIZE_64 - 1) / BLOCK_SIZE_64;
410 uint32_t blk_rows = (uint32_t)(input_picture_ptr->height + BLOCK_SIZE_64 - 1) / BLOCK_SIZE_64;
411
412 uint32_t x_seg_idx;
413 uint32_t y_seg_idx;
414 uint32_t picture_width_in_b64 = blk_cols;
415 uint32_t picture_height_in_b64 = blk_rows;
416 SEGMENT_CONVERT_IDX_TO_XY(
417 segment_index, x_seg_idx, y_seg_idx, ppcs_ptr->first_pass_seg_column_count);
418 uint32_t x_b64_start_idx = SEGMENT_START_IDX(
419 x_seg_idx, picture_width_in_b64, ppcs_ptr->first_pass_seg_column_count);
420 uint32_t x_b64_end_idx = SEGMENT_END_IDX(
421 x_seg_idx, picture_width_in_b64, ppcs_ptr->first_pass_seg_column_count);
422 uint32_t y_b64_start_idx = SEGMENT_START_IDX(
423 y_seg_idx, picture_height_in_b64, ppcs_ptr->first_pass_seg_row_count);
424 uint32_t y_b64_end_idx = SEGMENT_END_IDX(
425 y_seg_idx, picture_height_in_b64, ppcs_ptr->first_pass_seg_row_count);
426
427 const uint32_t mb_y_end = (y_b64_end_idx << 2) > mb_rows ? mb_rows : (y_b64_end_idx << 2);
428 const uint32_t mb_x_end = (x_b64_end_idx << 2) > mb_cols ? mb_cols : (x_b64_end_idx << 2);
429
430 for (uint32_t mb_y = (y_b64_start_idx << 2); mb_y < mb_y_end; mb_y++) {
431 for (uint32_t mb_x = (x_b64_start_idx << 2); mb_x < mb_x_end; mb_x++) {
432 memset(firstpass_data->mb_stats + mb_x + mb_y * mb_cols, 0, sizeof(*firstpass_data->mb_stats));
433 firstpass_data->mb_stats[mb_x + mb_y * mb_cols].image_data_start_row = INVALID_ROW;
434 }
435 }
436 }
437
first_pass_frame_end(PictureParentControlSet * pcs_ptr,const int64_t ts_duration)438 void first_pass_frame_end(PictureParentControlSet *pcs_ptr, const int64_t ts_duration) {
439 SequenceControlSet *scs_ptr = pcs_ptr->scs_ptr;
440 const uint32_t mb_cols = (scs_ptr->seq_header.max_frame_width + 16 - 1) / 16;
441 const uint32_t mb_rows = (scs_ptr->seq_header.max_frame_height + 16 - 1) / 16;
442
443 int * raw_motion_err_list = pcs_ptr->firstpass_data.raw_motion_err_list;
444 FRAME_STATS *mb_stats = pcs_ptr->firstpass_data.mb_stats;
445
446 FRAME_STATS stats;
447 double raw_err_stdev = 0;
448 if (!pcs_ptr->skip_frame) {
449 stats = accumulate_frame_stats(mb_stats, mb_rows, mb_cols);
450 int total_raw_motion_err_count = frame_is_intra_only(pcs_ptr) ? 0 : mb_rows * mb_cols;
451 raw_err_stdev = raw_motion_error_stdev(raw_motion_err_list,
452 total_raw_motion_err_count);
453 // Clamp the image start to rows/2. This number of rows is discarded top
454 // and bottom as dead data so rows / 2 means the frame is blank.
455 if ((stats.image_data_start_row > (int)mb_rows / 2) ||
456 (stats.image_data_start_row == INVALID_ROW)) {
457 stats.image_data_start_row = mb_rows / 2;
458 }
459 // Exclude any image dead zone
460 if (stats.image_data_start_row > 0) {
461 stats.intra_skip_count = AOMMAX(
462 0, stats.intra_skip_count - (stats.image_data_start_row * (int)mb_cols * 2));
463 }
464 const int num_mbs = mb_rows * mb_cols;
465 /*(cpi->oxcf.resize_cfg.resize_mode != RESIZE_NONE)
466 ? cpi->initial_mbs
467 : mi_params->MBs;*/
468 stats.intra_factor = stats.intra_factor / (double)num_mbs;
469 stats.brightness_factor = stats.brightness_factor / (double)num_mbs;
470 }
471 update_firstpass_stats(
472 pcs_ptr, &stats, raw_err_stdev, (const int)pcs_ptr->picture_number, ts_duration);
473 }
474 /******************************************************
475 * Derive Pre-Analysis settings for first pass for pcs
476 Input : encoder mode and tune
477 Output : Pre-Analysis signal(s)
478 ******************************************************/
first_pass_signal_derivation_pre_analysis_pcs(PictureParentControlSet * pcs_ptr)479 extern EbErrorType first_pass_signal_derivation_pre_analysis_pcs(PictureParentControlSet *pcs_ptr) {
480 EbErrorType return_error = EB_ErrorNone;
481 // Derive HME Flag
482 pcs_ptr->enable_hme_flag = 1;
483 pcs_ptr->enable_hme_level0_flag = 1;
484 pcs_ptr->enable_hme_level1_flag = 1;
485 pcs_ptr->enable_hme_level2_flag = 1;
486
487 //// Set here to allocate resources for the downsampled pictures used in HME (generated in PictureAnalysis)
488 //// Will be later updated for SC/NSC in PictureDecisionProcess
489 pcs_ptr->tf_enable_hme_flag = 0;
490 pcs_ptr->tf_enable_hme_level0_flag = 0;
491 pcs_ptr->tf_enable_hme_level1_flag = 0;
492 pcs_ptr->tf_enable_hme_level2_flag = 0;
493
494 return return_error;
495 }
496
497 /******************************************************
498 * Derive Pre-Analysis settings for first pass for scs
499 Input : encoder mode and tune
500 Output : Pre-Analysis signal(s)
501 ******************************************************/
first_pass_signal_derivation_pre_analysis_scs(SequenceControlSet * scs_ptr)502 extern EbErrorType first_pass_signal_derivation_pre_analysis_scs(SequenceControlSet * scs_ptr) {
503 EbErrorType return_error = EB_ErrorNone;
504 scs_ptr->seq_header.enable_intra_edge_filter = 0;
505 scs_ptr->seq_header.pic_based_rate_est = 0;
506 scs_ptr->seq_header.enable_restoration = 0;
507 scs_ptr->seq_header.cdef_level /*enable_cdef*/ = 0;
508 scs_ptr->seq_header.enable_warped_motion = 0;
509
510 return return_error;
511 }
512
513 #define LOW_MOTION_ERROR_THRESH 25
514 #define MOTION_ERROR_THRESH 500
515 void set_tf_controls(PictureParentControlSet *pcs_ptr, uint8_t tf_level);
516 /******************************************************
517 * Derive Multi-Processes Settings for first pass
518 Input : encoder mode and tune
519 Output : Multi-Processes signal(s)
520 ******************************************************/
521
first_pass_signal_derivation_multi_processes(SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr)522 EbErrorType first_pass_signal_derivation_multi_processes(SequenceControlSet * scs_ptr,
523 PictureParentControlSet *pcs_ptr) {
524 EbErrorType return_error = EB_ErrorNone;
525 FrameHeader *frm_hdr = &pcs_ptr->frm_hdr;
526 // If enabled here, the hme enable flags should also be enabled in ResourceCoordinationProcess
527 // to ensure that resources are allocated for the downsampled pictures used in HME
528 pcs_ptr->enable_hme_flag = 1;
529 pcs_ptr->enable_hme_level0_flag = 1;
530 pcs_ptr->enable_hme_level1_flag = 1;
531 pcs_ptr->enable_hme_level2_flag = 1;
532
533 pcs_ptr->tf_enable_hme_flag = 0;
534 pcs_ptr->tf_enable_hme_level0_flag = 0;
535 pcs_ptr->tf_enable_hme_level1_flag = 0;
536 pcs_ptr->tf_enable_hme_level2_flag = 0;
537
538 // Set the Multi-Pass PD level
539 pcs_ptr->multi_pass_pd_level = MULTI_PASS_PD_OFF;
540
541 // Set disallow_nsq
542 pcs_ptr->disallow_nsq = EB_TRUE;
543
544 pcs_ptr->max_number_of_pus_per_sb = SQUARE_PU_COUNT;
545 pcs_ptr->disallow_all_nsq_blocks_below_8x8 = EB_TRUE;
546
547 // Set disallow_all_nsq_blocks_below_16x16: 16x8, 8x16, 16x4, 4x16
548 pcs_ptr->disallow_all_nsq_blocks_below_16x16 = EB_TRUE;
549
550 pcs_ptr->disallow_all_nsq_blocks_below_64x64 = EB_TRUE;
551 pcs_ptr->disallow_all_nsq_blocks_below_32x32 = EB_TRUE;
552 pcs_ptr->disallow_all_nsq_blocks_above_64x64 = EB_TRUE;
553 pcs_ptr->disallow_all_nsq_blocks_above_32x32 = EB_TRUE;
554 // disallow_all_nsq_blocks_above_16x16
555 pcs_ptr->disallow_all_nsq_blocks_above_16x16 = EB_TRUE;
556
557 pcs_ptr->disallow_HVA_HVB_HV4 = EB_TRUE;
558 pcs_ptr->disallow_HV4 = EB_TRUE;
559
560 // Set disallow_all_non_hv_nsq_blocks_below_16x16
561 pcs_ptr->disallow_all_non_hv_nsq_blocks_below_16x16 = EB_TRUE;
562
563 // Set disallow_all_h4_v4_blocks_below_16x16
564 pcs_ptr->disallow_all_h4_v4_blocks_below_16x16 = EB_TRUE;
565
566 frm_hdr->allow_screen_content_tools = 0;
567 frm_hdr->allow_intrabc = 0;
568
569 // Palette Modes:
570 // 0:OFF
571 // 1:Slow NIC=7/4/4
572 // 2: NIC=7/2/2
573 // 3: NIC=7/2/2 + No K means for non ref
574 // 4: NIC=4/2/1
575 // 5: NIC=4/2/1 + No K means for Inter frame
576 // 6: Fastest NIC=4/2/1 + No K means for non base + step for non base for most dominent
577 pcs_ptr->palette_level = 0;
578 // Loop filter Level Settings
579 // 0 OFF
580 // 1 CU-BASED
581 // 2 LIGHT FRAME-BASED
582 // 3 FULL FRAME-BASED
583 pcs_ptr->loop_filter_mode = 0;
584
585 // CDEF Level Settings
586 // 0 OFF
587 // 1 1 step refinement
588 // 2 4 step refinement
589 // 3 8 step refinement
590 // 4 16 step refinement
591 // 5 64 step refinement
592 pcs_ptr->cdef_level = 0;
593
594 // SG Level Settings
595 // 0 OFF
596 // 1 0 step refinement
597 // 2 1 step refinement
598 // 3 4 step refinement
599 // 4 16 step refinement
600 Av1Common *cm = pcs_ptr->av1_cm;
601 cm->sg_filter_mode = 0;
602
603 // WN Level Settings
604 // 0 OFF
605 // 1 3-Tap luma/ 3-Tap chroma
606 // 2 5-Tap luma/ 5-Tap chroma
607 // 3 7-Tap luma/ 5-Tap chroma
608 cm->wn_filter_mode = 0;
609
610 // Intra prediction modes Settings
611 // 0 FULL
612 // 1 LIGHT per block : disable_z2_prediction && disable_angle_refinement for 64/32/4
613 // 2 OFF per block : disable_angle_prediction for 64/32/4
614 // 3 OFF : disable_angle_prediction
615 // 4 OIS based Intra
616 // 5 Light OIS based Intra
617 pcs_ptr->intra_pred_mode = 3;
618
619 // Set Tx Search Settings
620 // 0 OFF
621 // 1 ON
622 pcs_ptr->tx_size_search_mode = 1;
623
624 // Set frame end cdf update mode Settings
625 // 0 OFF
626 // 1 ON
627 if (scs_ptr->static_config.frame_end_cdf_update == DEFAULT)
628 pcs_ptr->frame_end_cdf_update_mode = 0;
629 else
630 pcs_ptr->frame_end_cdf_update_mode = scs_ptr->static_config.frame_end_cdf_update;
631
632 pcs_ptr->frm_hdr.use_ref_frame_mvs = 0;
633
634 // Global motion level Settings
635 // GM_FULL Exhaustive search mode.
636 // GM_DOWN Downsampled resolution with a
637 // downsampling factor of 2 in each dimension GM_TRAN_ONLY Translation only
638 // using ME MV.
639 pcs_ptr->gm_level = GM_DOWN;
640
641 // Exit TX size search when all coefficients are zero
642 // 0: OFF
643 // 1: ON
644 pcs_ptr->tx_size_early_exit = 0;
645
646 return return_error;
647 }
648 /******************************************************
649 * Derive Mode Decision Config Settings for first pass
650 Input : encoder mode and tune
651 Output : EncDec Kernel signal(s)
652 ******************************************************/
first_pass_signal_derivation_mode_decision_config_kernel(PictureControlSet * pcs_ptr)653 EbErrorType first_pass_signal_derivation_mode_decision_config_kernel(PictureControlSet *pcs_ptr) {
654 EbErrorType return_error = EB_ErrorNone;
655 // CDF
656 pcs_ptr->cdf_ctrl.enabled = pcs_ptr->cdf_ctrl.update_coef = 0;
657 pcs_ptr->cdf_ctrl.update_mv = pcs_ptr->cdf_ctrl.update_se = 0;
658
659 // Filter INTRA
660 // pic_filter_intra_level specifies whether filter intra would be active
661 // for a given picture.
662 // pic_filter_intra_level | Settings
663 // 0 | OFF
664 // 1 | ON
665 pcs_ptr->pic_filter_intra_level = 0;
666
667 // High Precision
668 FrameHeader *frm_hdr = &pcs_ptr->parent_pcs_ptr->frm_hdr;
669 frm_hdr->allow_high_precision_mv = 0;
670
671 // Warped
672 frm_hdr->allow_warped_motion = 0;
673 frm_hdr->is_motion_mode_switchable = frm_hdr->allow_warped_motion;
674
675 // pic_obmc_level - pic_obmc_level is used to define md_pic_obmc_level.
676 // The latter determines the OBMC settings in the function set_obmc_controls.
677 // Please check the definitions of the flags/variables in the function
678 // set_obmc_controls corresponding to the pic_obmc_level settings.
679 // pic_obmc_level | Default Encoder Settings | Command Line Settings
680 // 0 | OFF subject to possible constraints | OFF everywhere in encoder
681 // 1 | ON subject to possible constraints | Fully ON in PD_PASS_2
682 // 2 | Faster level subject to possible constraints | Level 2 everywhere in PD_PASS_2
683 // 3 | Even faster level subject to possible constraints | Level 3 everywhere in PD_PASS_3
684 pcs_ptr->parent_pcs_ptr->pic_obmc_level = 0;
685
686 // Switchable Motion Mode
687 frm_hdr->is_motion_mode_switchable = frm_hdr->is_motion_mode_switchable ||
688 pcs_ptr->parent_pcs_ptr->pic_obmc_level;
689
690 // HBD Mode
691 pcs_ptr->hbd_mode_decision = EB_8_BIT_MD; //first pass hard coded to 8bit
692 pcs_ptr->parent_pcs_ptr->bypass_cost_table_gen = 0;
693 return return_error;
694 }
695 void *set_me_hme_params_oq(MeContext *me_context_ptr, PictureParentControlSet *pcs_ptr,
696 SequenceControlSet *scs_ptr, EbInputResolution input_resolution);
697 void *set_me_hme_params_from_config(SequenceControlSet *scs_ptr, MeContext *me_context_ptr);
698 void set_me_hme_ref_prune_ctrls(MeContext *context_ptr, uint8_t prune_level);
699 void set_me_sr_adjustment_ctrls(MeContext *context_ptr, uint8_t sr_adjustment_level);
700 void set_gm_controls(PictureParentControlSet *pcs_ptr, uint8_t gm_level);
701 void set_prehme_ctrls(MeContext* context, uint8_t level);
702 /******************************************************
703 * Derive ME Settings for first pass
704 Input : encoder mode and tune
705 Output : ME Kernel signal(s)
706 ******************************************************/
first_pass_signal_derivation_me_kernel(SequenceControlSet * scs_ptr,PictureParentControlSet * pcs_ptr,MotionEstimationContext_t * context_ptr)707 EbErrorType first_pass_signal_derivation_me_kernel(SequenceControlSet * scs_ptr,
708 PictureParentControlSet * pcs_ptr,
709 MotionEstimationContext_t *context_ptr) {
710 EbErrorType return_error = EB_ErrorNone;
711
712 // Set ME/HME search regions
713
714 if (scs_ptr->static_config.use_default_me_hme)
715 set_me_hme_params_oq(
716 context_ptr->me_context_ptr, pcs_ptr, scs_ptr, scs_ptr->input_resolution);
717 else
718 set_me_hme_params_from_config(scs_ptr, context_ptr->me_context_ptr);
719
720 // Set HME flags
721 context_ptr->me_context_ptr->enable_hme_flag = pcs_ptr->enable_hme_flag;
722 context_ptr->me_context_ptr->enable_hme_level0_flag = pcs_ptr->enable_hme_level0_flag;
723 context_ptr->me_context_ptr->enable_hme_level1_flag = pcs_ptr->enable_hme_level1_flag;
724 context_ptr->me_context_ptr->enable_hme_level2_flag = scs_ptr->enc_mode_2ndpass <= ENC_M7 ? pcs_ptr->enable_hme_level2_flag : 0;
725
726 // HME Search Method
727 context_ptr->me_context_ptr->hme_search_method = SUB_SAD_SEARCH;
728
729 // ME Search Method
730 context_ptr->me_context_ptr->me_search_method = SUB_SAD_SEARCH;
731 uint8_t gm_level = 0;
732 set_gm_controls(pcs_ptr, gm_level);
733
734 // Set hme/me based reference pruning level (0-4)
735 set_me_hme_ref_prune_ctrls(context_ptr->me_context_ptr, 0);
736
737 // Set hme-based me sr adjustment level
738 set_me_sr_adjustment_ctrls(context_ptr->me_context_ptr, 0);
739 context_ptr->me_context_ptr->prune_me_candidates_th = 0; // No impact on tf
740 set_prehme_ctrls(context_ptr->me_context_ptr, 0);
741 return return_error;
742 };
743
744 /***************************************************************************
745 * Computes and returns the intra pred error of a block using src.
746 * intra pred error: sum of squared error of the intra predicted residual.
747 * Modifies:
748 * stats->intra_skip_count
749 * stats->image_data_start_row
750 * stats->intra_factor
751 * stats->brightness_factor
752 * stats->intra_error
753 * stats->frame_avg_wavelet_energy
754 * Returns:
755 * this_intra_error.
756 ***************************************************************************/
open_loop_firstpass_intra_prediction(PictureParentControlSet * ppcs_ptr,int raw_motion_err,uint32_t blk_origin_x,uint32_t blk_origin_y,uint8_t bwidth,uint8_t bheight,EbPictureBufferDesc * input_picture_ptr,uint32_t input_origin_index,FRAME_STATS * const stats)757 static int open_loop_firstpass_intra_prediction(PictureParentControlSet *ppcs_ptr,int raw_motion_err , uint32_t blk_origin_x, uint32_t blk_origin_y,
758 uint8_t bwidth, uint8_t bheight,
759 EbPictureBufferDesc *input_picture_ptr,
760 uint32_t input_origin_index,
761 FRAME_STATS *const stats) {
762 int32_t mb_row = blk_origin_y >> 4;
763 int32_t mb_col = blk_origin_x >> 4;
764 const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
765 uint8_t use8blk = 0;
766 if (ppcs_ptr->scs_ptr->enc_mode_2ndpass <= ENC_M7) {
767 use8blk = 0;
768 }
769 else {
770 if (ppcs_ptr->first_pass_ref_count)
771 if (raw_motion_err > MOTION_ERROR_THRESH)
772 use8blk = 0;
773 else
774 use8blk = 1;
775 else
776 use8blk = 0;
777 }
778
779 uint32_t sub_blk_origin_x, sub_blk_origin_y;
780 uint8_t *above_row;
781 uint8_t *left_col;
782
783 DECLARE_ALIGNED(16, uint8_t, left_data[MAX_TX_SIZE * 2 + 32]);
784 DECLARE_ALIGNED(16, uint8_t, above_data[MAX_TX_SIZE * 2 + 32]);
785 DECLARE_ALIGNED(32, uint8_t, predictor8[256 * 2]);
786 uint8_t *predictor = predictor8;
787 uint8_t sub_blk_rows = use_dc_pred
788 ? (bheight == FORCED_BLK_SIZE && bwidth == FORCED_BLK_SIZE) ? 1 : bheight / 8
789 : use8blk ? bheight / 8 : bheight / 4 ;
790 uint8_t sub_blk_cols = use_dc_pred
791 ? (bheight == FORCED_BLK_SIZE && bwidth == FORCED_BLK_SIZE) ? 1 : bwidth / 8
792 : use8blk ? bwidth / 8 : bwidth / 4 ;
793 for (uint32_t sub_blk_index_y = 0; sub_blk_index_y < sub_blk_rows; ++sub_blk_index_y) {
794 for (uint32_t sub_blk_index_x = 0; sub_blk_index_x < sub_blk_cols; ++sub_blk_index_x) {
795 TxSize tx_size = use_dc_pred
796 ? ((bheight == FORCED_BLK_SIZE && bwidth == FORCED_BLK_SIZE) ? TX_16X16 : TX_8X8)
797 : use8blk ? TX_8X8 : TX_4X4 ;
798 sub_blk_origin_x = blk_origin_x + sub_blk_index_x * bwidth / sub_blk_cols;
799 sub_blk_origin_y = blk_origin_y + sub_blk_index_y * bheight / sub_blk_rows;
800 above_row = above_data + 16;
801 left_col = left_data + 16;
802
803 // Fill Neighbor Arrays
804 update_neighbor_samples_array_open_loop_mb(
805 0, // use_top_righ_bottom_left
806 0, // update_top_neighbor
807 above_row - 1,
808 left_col - 1,
809 input_picture_ptr,
810 input_picture_ptr->stride_y,
811 sub_blk_origin_x,
812 sub_blk_origin_y,
813 bwidth / sub_blk_cols,
814 bheight / sub_blk_rows);
815 // point to top_neighbor at input buffer
816 if (sub_blk_origin_y != 0 ) {
817 (above_row) = ((input_picture_ptr->buffer_y + (((sub_blk_origin_y + input_picture_ptr->origin_y) * input_picture_ptr->stride_y) + (sub_blk_origin_x + input_picture_ptr->origin_x)) ) - input_picture_ptr->stride_y);
818 }
819 // PRED
820 predictor = &predictor8[(sub_blk_origin_x - blk_origin_x) +
821 (sub_blk_origin_y - blk_origin_y) * FORCED_BLK_SIZE];
822 intra_prediction_open_loop_mb(0,
823 DC_PRED,
824 sub_blk_origin_x,
825 sub_blk_origin_y,
826 tx_size,
827 above_row,
828 left_col,
829 predictor,
830 FORCED_BLK_SIZE);
831 }
832 }
833
834 EbSpatialFullDistType spatial_full_dist_type_fun = svt_spatial_full_distortion_kernel;
835 int this_intra_error = (uint32_t)(spatial_full_dist_type_fun(input_picture_ptr->buffer_y,
836 input_origin_index,
837 input_picture_ptr->stride_y,
838 predictor8,
839 0,
840 FORCED_BLK_SIZE,
841 bwidth,
842 bheight));
843
844 if (this_intra_error < UL_INTRA_THRESH) {
845 ++stats->intra_skip_count;
846 } else if ((mb_col > 0) && (stats->image_data_start_row == INVALID_ROW)) {
847 stats->image_data_start_row = mb_row;
848 }
849 // aom_clear_system_state();
850 double log_intra = log1p((double)this_intra_error);
851 if (log_intra < 10.0)
852 stats->intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
853 else
854 stats->intra_factor += 1.0;
855
856 int level_sample = input_picture_ptr->buffer_y[input_origin_index];
857
858 if ((level_sample < DARK_THRESH) && (log_intra < 9.0))
859 stats->brightness_factor += 1.0 + (0.01 * (DARK_THRESH - level_sample));
860 else
861 stats->brightness_factor += 1.0;
862 // Intrapenalty below deals with situations where the intra and inter
863 // error scores are very low (e.g. a plain black frame).
864 // We do not have special cases in first pass for 0,0 and nearest etc so
865 // all inter modes carry an overhead cost estimate for the mv.
866 // When the error score is very low this causes us to pick all or lots of
867 // INTRA modes and throw lots of key frames.
868 // This penalty adds a cost matching that of a 0,0 mv to the intra case.
869 this_intra_error += INTRA_MODE_PENALTY;
870
871 // Accumulate the intra error.
872 stats->intra_error += (int64_t)this_intra_error;
873 return this_intra_error;
874 }
875 /***************************************************************************
876 * Computes and returns the inter prediction error from the src last frame.
877 * Computes inter prediction errors from the golden and updates stats accordingly.
878 * Modifies:
879 * stats: many member params in it.
880 * Returns:
881 * this_inter_error
882 ***************************************************************************/
open_loop_firstpass_inter_prediction(PictureParentControlSet * ppcs_ptr,uint32_t me_sb_addr,uint32_t blk_origin_x,uint32_t blk_origin_y,uint8_t bwidth,uint8_t bheight,EbPictureBufferDesc * input_picture_ptr,uint32_t input_origin_index,const int this_intra_error,MV * last_mv,int raw_motion_err,FRAME_STATS * stats)883 static int open_loop_firstpass_inter_prediction(
884 PictureParentControlSet *ppcs_ptr, uint32_t me_sb_addr, uint32_t blk_origin_x,
885 uint32_t blk_origin_y, uint8_t bwidth, uint8_t bheight, EbPictureBufferDesc *input_picture_ptr,
886 uint32_t input_origin_index, const int this_intra_error, MV *last_mv, int raw_motion_err,
887 FRAME_STATS *stats) {
888 int32_t mb_row = blk_origin_y >> 4;
889 int32_t mb_col = blk_origin_x >> 4;
890 const uint32_t mb_cols = (ppcs_ptr->scs_ptr->seq_header.max_frame_width + FORCED_BLK_SIZE - 1) /
891 FORCED_BLK_SIZE;
892 const uint32_t mb_rows = (ppcs_ptr->scs_ptr->seq_header.max_frame_height + FORCED_BLK_SIZE -
893 1) /
894 FORCED_BLK_SIZE;
895 int this_inter_error = this_intra_error;
896 FULLPEL_MV mv = kZeroFullMv;
897 EbSpatialFullDistType spatial_full_dist_type_fun = svt_spatial_full_distortion_kernel;
898
899 int motion_error = 0;
900 // TODO(pengchong): Replace the hard-coded threshold
901 if (raw_motion_err > LOW_MOTION_ERROR_THRESH) {
902 uint32_t me_mb_offset = 0;
903 BlockGeom blk_geom;
904 const MeSbResults *me_results = ppcs_ptr->pa_me_data->me_results[me_sb_addr];
905 uint32_t me_sb_size = ppcs_ptr->scs_ptr->sb_sz;
906 blk_geom.origin_x = blk_origin_x - (blk_origin_x / me_sb_size) * me_sb_size;
907 blk_geom.origin_y = blk_origin_y - (blk_origin_y / me_sb_size) * me_sb_size;
908 blk_geom.bwidth = FORCED_BLK_SIZE;
909 blk_geom.bheight = FORCED_BLK_SIZE;
910 me_mb_offset = get_me_info_index(ppcs_ptr->max_number_of_pus_per_sb, &blk_geom, 0, 0);
911 uint8_t list_index = 0;
912 uint8_t ref_pic_index = 0;
913 mv.col =
914 me_results
915 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) + ref_pic_index]
916 .x_mv >>
917 2;
918 mv.row =
919 me_results
920 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) + ref_pic_index]
921 .y_mv >>
922 2;
923
924 EbPictureBufferDesc *last_input_picture_ptr = ppcs_ptr->first_pass_ref_count
925 ? ppcs_ptr->first_pass_ref_ppcs_ptr[0]->enhanced_picture_ptr
926 : NULL;
927 int32_t ref_origin_index;
928 if (last_input_picture_ptr != NULL)
929 {
930 ref_origin_index = last_input_picture_ptr->origin_x + (blk_origin_x + mv.col) +
931 (blk_origin_y + mv.row + last_input_picture_ptr->origin_y) *
932 last_input_picture_ptr->stride_y;
933
934 motion_error = (uint32_t)(spatial_full_dist_type_fun(input_picture_ptr->buffer_y,
935 input_origin_index,
936 input_picture_ptr->stride_y,
937 last_input_picture_ptr->buffer_y,
938 ref_origin_index,
939 last_input_picture_ptr->stride_y,
940 bwidth,
941 bheight));
942 }
943
944 // Assume 0,0 motion with no mv overhead.
945 if (mv.col != 0 && mv.row != 0) {
946 motion_error += NEW_MV_MODE_PENALTY;
947 }
948 // Motion search in 2nd reference frame.
949 int gf_motion_error = motion_error;
950 if (ppcs_ptr->first_pass_ref_count > 1 &&
951 me_results->total_me_candidate_index[me_mb_offset] > 1) {
952 // To convert full-pel MV
953 list_index = 0;
954 ref_pic_index = 1;
955 FULLPEL_MV gf_mv;
956 gf_mv.col = me_results
957 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) +
958 ref_pic_index]
959 .x_mv >>
960 2;
961 gf_mv.row = me_results
962 ->me_mv_array[me_mb_offset * MAX_PA_ME_MV + (list_index ? 4 : 0) +
963 ref_pic_index]
964 .y_mv >>
965 2;
966 EbPictureBufferDesc *golden_input_picture_ptr =
967 ppcs_ptr->first_pass_ref_ppcs_ptr[1]->enhanced_picture_ptr;
968 ref_origin_index = golden_input_picture_ptr->origin_x + (blk_origin_x + gf_mv.col) +
969 (blk_origin_y + gf_mv.row + golden_input_picture_ptr->origin_y) *
970 golden_input_picture_ptr->stride_y;
971
972 gf_motion_error = (uint32_t)(
973 spatial_full_dist_type_fun(input_picture_ptr->buffer_y,
974 input_origin_index,
975 input_picture_ptr->stride_y,
976 golden_input_picture_ptr->buffer_y,
977 ref_origin_index,
978 golden_input_picture_ptr->stride_y,
979 bwidth,
980 bheight));
981
982 // Assume 0,0 motion with no mv overhead.
983 if (gf_mv.col != 0 && gf_mv.row != 0) {
984 gf_motion_error += NEW_MV_MODE_PENALTY;
985 }
986 }
987
988 if (gf_motion_error < motion_error && gf_motion_error < this_intra_error) {
989 ++stats->second_ref_count;
990 motion_error = gf_motion_error;
991 }
992 // In accumulating a score for the 2nd reference frame take the
993 // best of the motion predicted score and the intra coded error
994 // (just as will be done for) accumulation of "coded_error" for
995 // the last frame.
996 if (ppcs_ptr->first_pass_ref_count > 1 && (gf_motion_error < motion_error * 3)) {
997 stats->sr_coded_error += AOMMIN(gf_motion_error, this_intra_error);
998 } else {
999 stats->sr_coded_error += motion_error;
1000 }
1001
1002 // Motion search in 3rd reference frame.
1003 int alt_motion_error = motion_error;
1004 if (alt_motion_error < motion_error && alt_motion_error < gf_motion_error &&
1005 alt_motion_error < this_intra_error) {
1006 ++stats->third_ref_count;
1007 }
1008 // In accumulating a score for the 3rd reference frame take the
1009 // best of the motion predicted score and the intra coded error
1010 // (just as will be done for) accumulation of "coded_error" for
1011 // the last frame.
1012 // alt_ref_frame is not supported yet
1013 stats->tr_coded_error += motion_error;
1014 } else {
1015 stats->sr_coded_error += motion_error;
1016 stats->tr_coded_error += motion_error;
1017 }
1018
1019 // Start by assuming that intra mode is best.
1020 if (motion_error <= this_intra_error) {
1021 #ifdef ARCH_X86_64
1022 aom_clear_system_state();
1023 #endif
1024 // Keep a count of cases where the inter and intra were very close
1025 // and very low. This helps with scene cut detection for example in
1026 // cropped clips with black bars at the sides or top and bottom.
1027 if (((this_intra_error - INTRA_MODE_PENALTY) * 9 <= motion_error * 10) &&
1028 (this_intra_error < (2 * INTRA_MODE_PENALTY))) {
1029 stats->neutral_count += 1.0;
1030 // Also track cases where the intra is not much worse than the inter
1031 // and use this in limiting the GF/arf group length.
1032 } else if ((this_intra_error > NCOUNT_INTRA_THRESH) &&
1033 (this_intra_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
1034 stats->neutral_count += (double)motion_error /
1035 DOUBLE_DIVIDE_CHECK((double)this_intra_error);
1036 }
1037 const MV best_mv = get_mv_from_fullmv(&mv);
1038 this_inter_error = motion_error;
1039 stats->sum_mvr += best_mv.row;
1040 stats->sum_mvr_abs += abs(best_mv.row);
1041 stats->sum_mvc += best_mv.col;
1042 stats->sum_mvc_abs += abs(best_mv.col);
1043 stats->sum_mvrs += best_mv.row * best_mv.row;
1044 stats->sum_mvcs += best_mv.col * best_mv.col;
1045 ++stats->inter_count;
1046 accumulate_mv_stats(best_mv, mv, mb_row, mb_col, mb_rows, mb_cols, last_mv, stats);
1047 }
1048
1049 return this_inter_error;
1050 }
1051 /***************************************************************************
1052 * Perform the processing for first pass.
1053 * For each 16x16 blocks performs DC and ME results from LAST frame and store
1054 * the required data.
1055 ***************************************************************************/
first_pass_frame_seg(PictureParentControlSet * ppcs_ptr,int32_t segment_index)1056 static EbErrorType first_pass_frame_seg(PictureParentControlSet *ppcs_ptr, int32_t segment_index) {
1057 EbPictureBufferDesc *input_picture_ptr = ppcs_ptr->enhanced_picture_ptr;
1058 EbPictureBufferDesc *last_input_picture_ptr = ppcs_ptr->first_pass_ref_count
1059 ? ppcs_ptr->first_pass_ref_ppcs_ptr[0]->enhanced_picture_ptr
1060 : NULL;
1061
1062 const uint32_t blk_cols = (uint32_t)(input_picture_ptr->width + FORCED_BLK_SIZE - 1) /
1063 FORCED_BLK_SIZE;
1064 const uint32_t blk_rows = (uint32_t)(input_picture_ptr->height + FORCED_BLK_SIZE - 1) /
1065 FORCED_BLK_SIZE;
1066
1067 uint32_t me_sb_size = ppcs_ptr->scs_ptr->sb_sz;
1068 uint32_t me_pic_width_in_sb = (ppcs_ptr->aligned_width + me_sb_size - 1) / me_sb_size;
1069 uint32_t me_sb_x, me_sb_y, me_sb_addr;
1070
1071 uint32_t blk_width, blk_height, blk_origin_x, blk_origin_y;
1072 MV first_top_mv = kZeroMv;
1073 MV last_mv;
1074 uint32_t input_origin_index;
1075
1076 uint32_t blks_in_b64 = BLOCK_SIZE_64 / FORCED_BLK_SIZE;
1077 uint32_t picture_width_in_b64 = (uint32_t)(input_picture_ptr->width + BLOCK_SIZE_64 - 1) / BLOCK_SIZE_64;
1078 uint32_t picture_height_in_b64 = (uint32_t)(input_picture_ptr->height + BLOCK_SIZE_64 - 1) / BLOCK_SIZE_64;
1079
1080 uint32_t x_seg_idx;
1081 uint32_t y_seg_idx;
1082 SEGMENT_CONVERT_IDX_TO_XY(
1083 segment_index, x_seg_idx, y_seg_idx, ppcs_ptr->first_pass_seg_column_count);
1084 uint32_t x_b64_start_idx = SEGMENT_START_IDX(
1085 x_seg_idx, picture_width_in_b64, ppcs_ptr->first_pass_seg_column_count);
1086 uint32_t x_b64_end_idx = SEGMENT_END_IDX(
1087 x_seg_idx, picture_width_in_b64, ppcs_ptr->first_pass_seg_column_count);
1088 uint32_t y_b64_start_idx = SEGMENT_START_IDX(
1089 y_seg_idx, picture_height_in_b64, ppcs_ptr->first_pass_seg_row_count);
1090 uint32_t y_b64_end_idx = SEGMENT_END_IDX(
1091 y_seg_idx, picture_height_in_b64, ppcs_ptr->first_pass_seg_row_count);
1092
1093 const uint32_t blk_index_y_end = (y_b64_end_idx * blks_in_b64) > blk_rows ? blk_rows : (y_b64_end_idx * blks_in_b64);
1094 const uint32_t blk_index_x_end = (x_b64_end_idx * blks_in_b64) > blk_cols ? blk_cols : (x_b64_end_idx * blks_in_b64);
1095 EbSpatialFullDistType spatial_full_dist_type_fun = svt_spatial_full_distortion_kernel;
1096
1097 for (uint32_t blk_index_y = (y_b64_start_idx * blks_in_b64); blk_index_y < blk_index_y_end; blk_index_y++) {
1098 for (uint32_t blk_index_x = (x_b64_start_idx * blks_in_b64); blk_index_x < blk_index_x_end; blk_index_x++) {
1099 blk_origin_x = blk_index_x * FORCED_BLK_SIZE;
1100 blk_origin_y = blk_index_y * FORCED_BLK_SIZE;
1101 me_sb_x = blk_origin_x / me_sb_size;
1102 me_sb_y = blk_origin_y / me_sb_size;
1103 me_sb_addr = me_sb_x + me_sb_y * me_pic_width_in_sb;
1104
1105 blk_width = (ppcs_ptr->aligned_width - blk_origin_x) < FORCED_BLK_SIZE
1106 ? ppcs_ptr->aligned_width - blk_origin_x
1107 : FORCED_BLK_SIZE;
1108 blk_height = (ppcs_ptr->aligned_height - blk_origin_y) < FORCED_BLK_SIZE
1109 ? ppcs_ptr->aligned_height - blk_origin_y
1110 : FORCED_BLK_SIZE;
1111
1112 input_origin_index = (input_picture_ptr->origin_y + blk_origin_y) *
1113 input_picture_ptr->stride_y +
1114 (input_picture_ptr->origin_x + blk_origin_x);
1115
1116 FRAME_STATS *mb_stats = ppcs_ptr->firstpass_data.mb_stats + blk_index_y * blk_cols +
1117 blk_index_x;
1118
1119 if (ppcs_ptr->first_pass_ref_count)
1120 ppcs_ptr->firstpass_data.raw_motion_err_list[blk_index_y * blk_cols + blk_index_x] =
1121 (uint32_t)(spatial_full_dist_type_fun(input_picture_ptr->buffer_y,
1122 input_origin_index,
1123 input_picture_ptr->stride_y,
1124 last_input_picture_ptr->buffer_y,
1125 input_origin_index,
1126 input_picture_ptr->stride_y,
1127 blk_width,
1128 blk_height));
1129
1130 int this_intra_error = open_loop_firstpass_intra_prediction(ppcs_ptr, ppcs_ptr->firstpass_data.raw_motion_err_list[blk_index_y * blk_cols + blk_index_x] ,blk_origin_x,
1131 blk_origin_y,
1132 blk_width,
1133 blk_height,
1134 input_picture_ptr,
1135 input_origin_index,
1136 mb_stats);
1137 int this_inter_error = this_intra_error;
1138
1139 if (blk_origin_x == 0)
1140 last_mv = first_top_mv;
1141
1142 if (ppcs_ptr->first_pass_ref_count) {
1143 this_inter_error = open_loop_firstpass_inter_prediction(
1144 ppcs_ptr,
1145 me_sb_addr,
1146 blk_origin_x,
1147 blk_origin_y,
1148 blk_width,
1149 blk_height,
1150 input_picture_ptr,
1151 input_origin_index,
1152 this_intra_error,
1153 &last_mv,
1154 ppcs_ptr->firstpass_data
1155 .raw_motion_err_list[blk_index_y * blk_cols + blk_index_x],
1156 mb_stats);
1157
1158 if (blk_origin_x == 0)
1159 first_top_mv = last_mv;
1160
1161 mb_stats->coded_error += this_inter_error;
1162 } else {
1163 mb_stats->sr_coded_error += this_intra_error;
1164 mb_stats->tr_coded_error += this_intra_error;
1165 mb_stats->coded_error += this_intra_error;
1166 }
1167 }
1168 }
1169
1170 return EB_ErrorNone;
1171 }
1172 /***************************************************************************
1173 * Prepare the me context for performing first pass me.
1174 ***************************************************************************/
first_pass_setup_me_context(MotionEstimationContext_t * context_ptr,PictureParentControlSet * ppcs_ptr,EbPictureBufferDesc * input_picture_ptr,int blk_row,int blk_col,uint32_t ss_x,uint32_t ss_y)1175 static void first_pass_setup_me_context(MotionEstimationContext_t *context_ptr,
1176 PictureParentControlSet * ppcs_ptr,
1177 EbPictureBufferDesc *input_picture_ptr, int blk_row,
1178 int blk_col, uint32_t ss_x, uint32_t ss_y) {
1179 // setup the references
1180 context_ptr->me_context_ptr->num_of_list_to_search = 0;
1181 context_ptr->me_context_ptr->num_of_ref_pic_to_search[0] = 0;
1182 context_ptr->me_context_ptr->num_of_ref_pic_to_search[1] = 0;
1183 context_ptr->me_context_ptr->temporal_layer_index = 0;
1184 context_ptr->me_context_ptr->is_used_as_reference_flag = 1;
1185
1186 if (ppcs_ptr->first_pass_ref_count) {
1187 context_ptr->me_context_ptr->me_ds_ref_array[0][0] =
1188 ppcs_ptr->first_pass_ref_ppcs_ptr[0]->ds_pics;
1189 context_ptr->me_context_ptr->num_of_ref_pic_to_search[0]++;
1190 }
1191 if (ppcs_ptr->first_pass_ref_count > 1) {
1192 context_ptr->me_context_ptr->me_ds_ref_array[0][1] =
1193 ppcs_ptr->first_pass_ref_ppcs_ptr[1]->ds_pics;
1194 context_ptr->me_context_ptr->num_of_ref_pic_to_search[0]++;
1195 }
1196
1197 context_ptr->me_context_ptr->me_type = ME_FIRST_PASS;
1198 // Set 1/4 and 1/16 ME reference buffer(s); filtered or decimated
1199 EbPictureBufferDesc *quarter_pic_ptr = ppcs_ptr->ds_pics.quarter_picture_ptr;
1200
1201 EbPictureBufferDesc *sixteenth_pic_ptr = ppcs_ptr->ds_pics.sixteenth_picture_ptr;
1202 // Parts from MotionEstimationKernel()
1203 uint32_t sb_origin_x = (uint32_t)(blk_col * BLOCK_SIZE_64);
1204 uint32_t sb_origin_y = (uint32_t)(blk_row * BLOCK_SIZE_64);
1205
1206 // Load the SB from the input to the intermediate SB buffer
1207 int buffer_index = (input_picture_ptr->origin_y + sb_origin_y) * input_picture_ptr->stride_y +
1208 input_picture_ptr->origin_x + sb_origin_x;
1209
1210 // set search method
1211 context_ptr->me_context_ptr->hme_search_method = SUB_SAD_SEARCH;
1212
1213 #ifdef ARCH_X86_64
1214 uint8_t *src_ptr = &(input_picture_ptr->buffer_y[buffer_index]);
1215
1216 uint32_t sb_height = (input_picture_ptr->height - sb_origin_y) < BLOCK_SIZE_64
1217 ? input_picture_ptr->height - sb_origin_y
1218 : BLOCK_SIZE_64;
1219 //_MM_HINT_T0 //_MM_HINT_T1 //_MM_HINT_T2 //_MM_HINT_NTA
1220 uint32_t i;
1221 for (i = 0; i < sb_height; i++) {
1222 char const *p = (char const *)(src_ptr + i * input_picture_ptr->stride_y);
1223 _mm_prefetch(p, _MM_HINT_T2);
1224 }
1225 #endif
1226 context_ptr->me_context_ptr->sb_src_ptr = &(input_picture_ptr->buffer_y[buffer_index]);
1227 context_ptr->me_context_ptr->sb_src_stride = input_picture_ptr->stride_y;
1228
1229 // Load the 1/4 decimated SB from the 1/4 decimated input to the 1/4 intermediate SB buffer
1230 buffer_index = (quarter_pic_ptr->origin_y + (sb_origin_y >> ss_y)) * quarter_pic_ptr->stride_y +
1231 quarter_pic_ptr->origin_x + (sb_origin_x >> ss_x);
1232
1233 context_ptr->me_context_ptr->quarter_sb_buffer = &quarter_pic_ptr->buffer_y[buffer_index];
1234 context_ptr->me_context_ptr->quarter_sb_buffer_stride = quarter_pic_ptr->stride_y;
1235
1236 // Load the 1/16 decimated SB from the 1/16 decimated input to the 1/16 intermediate SB buffer
1237 buffer_index = (sixteenth_pic_ptr->origin_y + (sb_origin_y >> 2)) *
1238 sixteenth_pic_ptr->stride_y +
1239 sixteenth_pic_ptr->origin_x + (sb_origin_x >> 2);
1240
1241 context_ptr->me_context_ptr->sixteenth_sb_buffer = &sixteenth_pic_ptr->buffer_y[buffer_index];
1242 context_ptr->me_context_ptr->sixteenth_sb_buffer_stride = sixteenth_pic_ptr->stride_y;
1243 }
1244 /***************************************************************************
1245 * Perform the motion estimation for first pass.
1246 ***************************************************************************/
first_pass_me(PictureParentControlSet * ppcs_ptr,MotionEstimationContext_t * me_context_ptr,int32_t segment_index)1247 static EbErrorType first_pass_me(PictureParentControlSet * ppcs_ptr,
1248 MotionEstimationContext_t *me_context_ptr, int32_t segment_index) {
1249 EbPictureBufferDesc *input_picture_ptr = ppcs_ptr->enhanced_picture_ptr;
1250
1251 uint32_t blk_cols = (uint32_t)(input_picture_ptr->width + BLOCK_SIZE_64 - 1) / BLOCK_SIZE_64;
1252 uint32_t blk_rows = (uint32_t)(input_picture_ptr->height + BLOCK_SIZE_64 - 1) / BLOCK_SIZE_64;
1253 uint32_t ss_x = ppcs_ptr->scs_ptr->subsampling_x;
1254 uint32_t ss_y = ppcs_ptr->scs_ptr->subsampling_y;
1255
1256 MeContext *context_ptr = me_context_ptr->me_context_ptr;
1257
1258 uint32_t x_seg_idx;
1259 uint32_t y_seg_idx;
1260 uint32_t picture_width_in_b64 = blk_cols;
1261 uint32_t picture_height_in_b64 = blk_rows;
1262 SEGMENT_CONVERT_IDX_TO_XY(
1263 segment_index, x_seg_idx, y_seg_idx, ppcs_ptr->first_pass_seg_column_count);
1264 uint32_t x_b64_start_idx = SEGMENT_START_IDX(
1265 x_seg_idx, picture_width_in_b64, ppcs_ptr->first_pass_seg_column_count);
1266 uint32_t x_b64_end_idx = SEGMENT_END_IDX(
1267 x_seg_idx, picture_width_in_b64, ppcs_ptr->first_pass_seg_column_count);
1268 uint32_t y_b64_start_idx = SEGMENT_START_IDX(
1269 y_seg_idx, picture_height_in_b64, ppcs_ptr->first_pass_seg_row_count);
1270 uint32_t y_b64_end_idx = SEGMENT_END_IDX(
1271 y_seg_idx, picture_height_in_b64, ppcs_ptr->first_pass_seg_row_count);
1272
1273 for (uint32_t blk_row = y_b64_start_idx; blk_row < y_b64_end_idx; blk_row++) {
1274 for (uint32_t blk_col = x_b64_start_idx; blk_col < x_b64_end_idx; blk_col++) {
1275 // Initialize ME context
1276 first_pass_setup_me_context(
1277 me_context_ptr, ppcs_ptr, input_picture_ptr, blk_row, blk_col, ss_x, ss_y);
1278 // Perform ME - context_ptr will store the outputs (MVs, buffers, etc)
1279 // Block-based MC using open-loop HME + refinement
1280 motion_estimate_sb(
1281 ppcs_ptr, // source picture control set -> references come from here
1282 (uint32_t)blk_row * blk_cols + blk_col,
1283 (uint32_t)blk_col * BLOCK_SIZE_64, // x block
1284 (uint32_t)blk_row * BLOCK_SIZE_64, // y block
1285 context_ptr,
1286 input_picture_ptr); // source picture
1287 }
1288 }
1289 return EB_ErrorNone;
1290 }
1291
1292 /************************************************************************************
1293 * Performs the first pass based on open loop data.
1294 * Source frames are used for Intra and Inter prediction.
1295 * ME is done per segment but the remaining parts performed per frame.
1296 ************************************************************************************/
open_loop_first_pass(PictureParentControlSet * ppcs_ptr,MotionEstimationContext_t * me_context_ptr,int32_t segment_index)1297 void open_loop_first_pass(PictureParentControlSet * ppcs_ptr,
1298 MotionEstimationContext_t *me_context_ptr, int32_t segment_index) {
1299 me_context_ptr->me_context_ptr->min_frame_size = MIN(ppcs_ptr->aligned_height,
1300 ppcs_ptr->aligned_width);
1301 // Perform the me for the first pass for each segment
1302 if (ppcs_ptr->scs_ptr->enc_mode_2ndpass <= ENC_M4)
1303 ppcs_ptr->skip_frame =0;
1304 else {
1305 if (ppcs_ptr->picture_number > 3 && ppcs_ptr->picture_number % 4 > 0)
1306 ppcs_ptr->skip_frame = 1;
1307 else
1308 ppcs_ptr->skip_frame = 0;
1309 }
1310
1311 if (!ppcs_ptr->skip_frame)
1312 if (ppcs_ptr->first_pass_ref_count)
1313 first_pass_me(ppcs_ptr, me_context_ptr, segment_index);
1314
1315 if (!ppcs_ptr->skip_frame){
1316 setup_firstpass_data_seg(ppcs_ptr, segment_index);
1317 // Perform the processing of the segment for each frame after me is done for all blocks
1318 first_pass_frame_seg(ppcs_ptr, segment_index);
1319 }
1320 svt_block_on_mutex(ppcs_ptr->first_pass_mutex);
1321 ppcs_ptr->first_pass_seg_acc++;
1322 if (ppcs_ptr->first_pass_seg_acc == ppcs_ptr->first_pass_seg_total_count) {
1323
1324 first_pass_frame_end(ppcs_ptr, ppcs_ptr->ts_duration);
1325 if (ppcs_ptr->end_of_sequence_flag && !ppcs_ptr->scs_ptr->lap_enabled)
1326 svt_av1_end_first_pass(ppcs_ptr);
1327 // Signal that the first pass is done
1328 svt_post_semaphore(ppcs_ptr->first_pass_done_semaphore);
1329 }
1330
1331 svt_release_mutex(ppcs_ptr->first_pass_mutex);
1332 }
1333