1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13
14 #include "av1/common/reconintra.h"
15
16 #include "av1/encoder/encoder.h"
17 #include "av1/encoder/speed_features.h"
18 #include "av1/encoder/rdopt.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21
22 #define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
23 // Max speed setting for tx domain evaluation
24 #define MAX_TX_DOMAIN_EVAL_SPEED 5
25 static MESH_PATTERN
26 good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
27 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
28 { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
29 { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
30 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
31 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
32 { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
33 };
34
35 // TODO(huisu@google.com): These settings are pretty relaxed, tune them for
36 // each speed setting
37 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
38 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
39 { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
40 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
41 { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
42 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
43 { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
44 };
45
46 // Threshold values to be used for pruning the txfm_domain_distortion
47 // based on block MSE
48 // Index 0: Default mode evaluation, Winner mode processing is not
49 // applicable (Eg : IntraBc). Index 1: Mode evaluation.
50 // Index 2: Winner mode evaluation. Index 1 and 2 are applicable when
51 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
52 // TODO(any): Experiment the threshold logic based on variance metric
53 static unsigned int tx_domain_dist_thresholds[3][MODE_EVAL_TYPES] = {
54 { UINT_MAX, UINT_MAX, UINT_MAX }, { 22026, 22026, 22026 }, { 0, 0, 0 }
55 };
56
57 // Transform domain distortion type to be used for default, mode and winner mode
58 // evaluation Index 0: Default mode evaluation, Winner mode processing is not
59 // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode
60 // evaluation. Index 1 and 2 are applicable when
61 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
62 static unsigned int tx_domain_dist_types[3][MODE_EVAL_TYPES] = { { 0, 2, 0 },
63 { 1, 2, 0 },
64 { 2, 2, 0 } };
65
66 // Threshold values to be used for disabling coeff RD-optimization
67 // based on block MSE / qstep^2.
68 // TODO(any): Experiment the threshold logic based on variance metric.
69 // Table has satd and dist threshold value index 0 : dist,index 1: satd
70 // For each row, the indices are as follows.
71 // Index 0: Default mode evaluation, Winner mode processing is not applicable
72 // (Eg : IntraBc)
73 // Index 1: Mode evaluation.
74 // Index 2: Winner mode evaluation.
75 // Index 1 and 2 are applicable when enable_winner_mode_for_coeff_opt speed
76 // feature is ON
77 // There are 7 levels with increasing speed, mapping to vertical indices.
78 static unsigned int coeff_opt_thresholds[9][MODE_EVAL_TYPES][2] = {
79 { { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX } },
80 { { 3200, UINT_MAX }, { 250, UINT_MAX }, { UINT_MAX, UINT_MAX } },
81 { { 1728, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
82 { { 864, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
83 { { 432, UINT_MAX }, { 86, UINT_MAX }, { UINT_MAX, UINT_MAX } },
84 { { 864, 97 }, { 142, 16 }, { UINT_MAX, UINT_MAX } },
85 { { 432, 97 }, { 86, 16 }, { UINT_MAX, UINT_MAX } },
86 { { 216, 25 }, { 86, 10 }, { UINT_MAX, UINT_MAX } },
87 { { 216, 25 }, { 0, 10 }, { UINT_MAX, UINT_MAX } }
88 };
89
90 // Transform size to be used for default, mode and winner mode evaluation
91 // Index 0: Default mode evaluation, Winner mode processing is not applicable
92 // (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation.
93 // Index 1 and 2 are applicable when enable_winner_mode_for_tx_size_srch speed
94 // feature is ON
95 static TX_SIZE_SEARCH_METHOD tx_size_search_methods[3][MODE_EVAL_TYPES] = {
96 { USE_FULL_RD, USE_LARGESTALL, USE_FULL_RD },
97 { USE_FAST_RD, USE_LARGESTALL, USE_FULL_RD },
98 { USE_LARGESTALL, USE_LARGESTALL, USE_FULL_RD }
99 };
100
101 // Predict transform skip levels to be used for default, mode and winner mode
102 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not
103 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
104 // Values indicate the aggressiveness of skip flag prediction.
105 // 0 : no early skip prediction
106 // 1 : conservative early skip prediction using DCT_DCT
107 // 2 : early skip prediction based on SSE
108 static unsigned int predict_skip_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
109 { 1, 1, 1 },
110 { 1, 2, 1 } };
111
112 // Predict DC block levels to be used for default, mode and winner mode
113 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not
114 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
115 // Values indicate the aggressiveness of skip flag prediction.
116 // 0 : no early DC block prediction
117 // 1 : Early DC block prediction based on error variance
118 static unsigned int predict_dc_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
119 { 1, 1, 0 },
120 { 1, 1, 1 } };
121
122 // This table holds the maximum number of reference frames for global motion.
123 // The table is indexed as per the speed feature 'gm_search_type'.
124 // 0 : All reference frames are allowed.
125 // 1 : All reference frames except L2 and L3 are allowed.
126 // 2 : All reference frames except L2, L3 and ARF2 are allowed.
127 // 3 : No reference frame is allowed.
128 static int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = {
129 INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0
130 };
131
132 // Qindex threshold levels used for selecting full-pel motion search.
133 // ms_qthresh[i][j][k] indicates the qindex boundary value for 'k'th qindex band
134 // for resolution index 'j' for aggressiveness level 'i'.
135 // Aggressiveness increases from i = 0 to 2.
136 // j = 0: lower than 720p resolution, j = 1: 720p or larger resolution.
137 // Currently invoked only for speed 0, 1 and 2.
138 static int ms_qindex_thresh[3][2][2] = { { { 200, 70 }, { MAXQ, 200 } },
139 { { 170, 50 }, { MAXQ, 200 } },
140 { { 170, 40 }, { 200, 40 } } };
141
142 // Full-pel search methods for aggressive search based on qindex.
143 // Index 0 is for resolutions lower than 720p, index 1 for 720p or larger
144 // resolutions. Currently invoked only for speed 1 and 2.
145 static SEARCH_METHODS motion_search_method[2] = { CLAMPED_DIAMOND, DIAMOND };
146
147 // Intra only frames, golden frames (except alt ref overlays) and
148 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const AV1_COMP * cpi)149 static int frame_is_boosted(const AV1_COMP *cpi) {
150 return frame_is_kf_gf_arf(cpi);
151 }
152
set_good_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)153 static void set_good_speed_feature_framesize_dependent(
154 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
155 const AV1_COMMON *const cm = &cpi->common;
156 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
157 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
158 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
159 const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
160 const bool use_hbd = cpi->oxcf.use_highbitdepth;
161
162 if (is_480p_or_larger) {
163 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
164 if (is_720p_or_larger)
165 sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
166 else
167 sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
168 } else {
169 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
170 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
171 if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
172 }
173
174 if (is_4k_or_larger) {
175 sf->part_sf.default_min_partition_size = BLOCK_8X8;
176 }
177
178 // TODO(huisu@google.com): train models for 720P and above.
179 if (!is_720p_or_larger) {
180 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
181 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
182 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
183 sf->part_sf.ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64
184 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
185 sf->part_sf.ml_early_term_after_part_split_level = 1;
186 }
187
188 if (is_720p_or_larger) {
189 // TODO(chiyotsai@google.com): make this speed feature adaptive based on
190 // current block's vertical texture instead of hardcoded with resolution
191 sf->mv_sf.use_downsampled_sad = 1;
192 }
193
194 if (speed >= 1) {
195 if (is_720p_or_larger) {
196 sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
197 } else if (is_480p_or_larger) {
198 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
199 } else {
200 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
201 }
202
203 if (!is_720p_or_larger) {
204 sf->part_sf.ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8
205 sf->part_sf.ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16
206 sf->part_sf.ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32
207 sf->part_sf.ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64
208 sf->part_sf.ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128
209 }
210 sf->part_sf.ml_early_term_after_part_split_level = 2;
211 }
212
213 if (speed >= 2) {
214 if (is_720p_or_larger) {
215 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
216 } else if (is_480p_or_larger) {
217 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
218 } else {
219 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
220 }
221
222 if (is_720p_or_larger) {
223 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
224 sf->part_sf.partition_search_breakout_rate_thr = 120;
225 } else {
226 sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
227 sf->part_sf.partition_search_breakout_rate_thr = 100;
228 }
229
230 if (is_720p_or_larger) {
231 sf->inter_sf.prune_obmc_prob_thresh = 16;
232 } else {
233 sf->inter_sf.prune_obmc_prob_thresh = 8;
234 }
235
236 if (is_480p_or_larger) {
237 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
238 if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
239 } else {
240 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
241 }
242 }
243
244 if (speed >= 3) {
245 sf->part_sf.ml_early_term_after_part_split_level = 0;
246
247 if (is_720p_or_larger) {
248 sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
249 sf->part_sf.partition_search_breakout_rate_thr = 200;
250 } else {
251 sf->part_sf.max_intra_bsize = BLOCK_32X32;
252 sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
253 sf->part_sf.partition_search_breakout_rate_thr = 120;
254 }
255 if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
256 }
257
258 if (speed >= 4) {
259 if (is_720p_or_larger) {
260 sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
261 } else {
262 sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
263 }
264
265 if (is_480p_or_larger) {
266 sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
267 }
268
269 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
270 }
271
272 if (speed >= 5) {
273 if (is_720p_or_larger) {
274 sf->inter_sf.prune_warped_prob_thresh = 16;
275 } else if (is_480p_or_larger) {
276 sf->inter_sf.prune_warped_prob_thresh = 8;
277 }
278 }
279
280 if (speed >= 6) {
281 if (is_720p_or_larger) {
282 sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
283 } else if (is_480p_or_larger) {
284 sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
285 }
286
287 if (is_1080p_or_larger) {
288 sf->part_sf.default_min_partition_size = BLOCK_8X8;
289 }
290
291 if (is_720p_or_larger) {
292 sf->inter_sf.disable_masked_comp = 1;
293 }
294
295 if (!is_720p_or_larger) {
296 sf->inter_sf.mv_cost_upd_level = 2;
297 }
298
299 // TODO(yunqing): use BLOCK_32X32 for >= 4k.
300 if (is_4k_or_larger) {
301 sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
302 } else if (is_720p_or_larger) {
303 sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
304 } else {
305 sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
306 }
307
308 if (is_720p_or_larger) {
309 sf->inter_sf.prune_ref_mv_idx_search = 2;
310 } else {
311 sf->inter_sf.prune_ref_mv_idx_search = 1;
312 }
313 }
314 }
315
set_rt_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)316 static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
317 SPEED_FEATURES *const sf,
318 int speed) {
319 const AV1_COMMON *const cm = &cpi->common;
320 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
321 const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
322 const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
323
324 (void)is_720p_or_larger; // Not used so far
325
326 if (!is_360p_or_larger) {
327 if (speed >= 6) sf->rt_sf.force_tx_search_off = 1;
328 if (speed >= 8) {
329 sf->rt_sf.use_modeled_non_rd_cost = 0;
330 sf->rt_sf.use_nonrd_filter_search = 0;
331 }
332 if (speed >= 9) {
333 sf->rt_sf.use_modeled_non_rd_cost = 1;
334 sf->rt_sf.nonrd_agressive_skip = 1;
335 // TODO(kyslov) Re-enable when AV1 models are trained
336 #if 0
337 #if CONFIG_RT_ML_PARTITIONING
338 if (!frame_is_intra_only(cm)) {
339 sf->part_sf.partition_search_type = ML_BASED_PARTITION;
340 sf->rt_sf.reuse_inter_pred_nonrd = 0;
341 }
342 #endif
343 #endif
344 }
345 } else {
346 if (speed == 8 && !cpi->use_svc) {
347 sf->rt_sf.short_circuit_low_temp_var = 0;
348 sf->rt_sf.use_nonrd_altref_frame = 1;
349 }
350 }
351 if (!is_480p_or_larger) {
352 if (speed == 7) {
353 sf->rt_sf.nonrd_check_partition_merge_mode = 2;
354 }
355 if (speed >= 8) {
356 sf->mv_sf.subpel_search_method = SUBPEL_TREE;
357 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
358 }
359 if (speed >= 9) {
360 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
361 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
362 }
363 }
364 }
365
set_good_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)366 static void set_good_speed_features_framesize_independent(
367 const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
368 const AV1_COMMON *const cm = &cpi->common;
369 const GF_GROUP *const gf_group = &cpi->gf_group;
370 const int boosted = frame_is_boosted(cpi);
371 const int is_boosted_arf2_bwd_type =
372 boosted || gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
373 const int allow_screen_content_tools =
374 cm->features.allow_screen_content_tools;
375 const int use_hbd = cpi->oxcf.use_highbitdepth;
376 if (!cpi->oxcf.tile_cfg.enable_large_scale_tile) {
377 sf->hl_sf.high_precision_mv_usage = LAST_MV_DATA;
378 }
379
380 // Speed 0 for all speed features that give neutral coding performance change.
381 sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
382
383 sf->part_sf.less_rectangular_check_level = 1;
384 sf->part_sf.ml_prune_partition = 1;
385 sf->part_sf.prune_ext_partition_types_search_level = 1;
386 sf->part_sf.prune_part4_search = 2;
387 sf->part_sf.simple_motion_search_prune_rect = 1;
388 sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
389
390 // TODO(debargha): Test, tweak and turn on either 1 or 2
391 sf->inter_sf.inter_mode_rd_model_estimation = 1;
392 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
393 sf->inter_sf.prune_compound_using_single_ref = 1;
394 sf->inter_sf.prune_mode_search_simple_translation = 1;
395 sf->inter_sf.prune_ref_frame_for_rect_partitions =
396 (boosted || (allow_screen_content_tools))
397 ? 0
398 : (is_boosted_arf2_bwd_type ? 1 : 2);
399 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 2;
400 sf->inter_sf.selective_ref_frame = 1;
401 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
402
403 sf->interp_sf.use_fast_interpolation_filter_search = 1;
404
405 sf->intra_sf.intra_pruning_with_hog = 1;
406
407 sf->tx_sf.adaptive_txb_search_level = 1;
408 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
409 sf->tx_sf.model_based_prune_tx_search_level = 1;
410 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
411
412 sf->tpl_sf.search_method = NSTEP_8PT;
413
414 sf->rt_sf.use_nonrd_pick_mode = 0;
415 sf->rt_sf.use_real_time_ref_set = 0;
416
417 if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
418 cpi->use_screen_content_tools) {
419 sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
420 } else {
421 sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
422 }
423
424 sf->rd_sf.perform_coeff_opt = 1;
425 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
426
427 if (speed >= 1) {
428 sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
429 sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
430
431 sf->part_sf.intra_cnn_split = 1;
432 sf->part_sf.simple_motion_search_early_term_none = 1;
433 // TODO(Venkat): Clean-up frame type dependency for
434 // simple_motion_search_split in partition search function and set the
435 // speed feature accordingly
436 sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
437 sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
438
439 sf->mv_sf.exhaustive_searches_thresh <<= 1;
440 sf->mv_sf.obmc_full_pixel_search_level = 1;
441 sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
442 sf->mv_sf.disable_extensive_joint_motion_search = 1;
443
444 sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
445 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1;
446 sf->inter_sf.prune_comp_type_by_comp_avg = 1;
447 sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1;
448 sf->inter_sf.prune_ref_frame_for_rect_partitions =
449 (frame_is_intra_only(&cpi->common) || (allow_screen_content_tools))
450 ? 0
451 : (boosted ? 1 : 2);
452 sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
453 sf->inter_sf.reuse_inter_intra_mode = 1;
454 sf->inter_sf.selective_ref_frame = 2;
455 sf->inter_sf.skip_repeated_newmv = 1;
456
457 sf->interp_sf.use_interp_filter = 1;
458
459 sf->intra_sf.prune_palette_search_level = 1;
460
461 sf->tx_sf.adaptive_txb_search_level = 2;
462 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
463 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
464 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
465 sf->tx_sf.model_based_prune_tx_search_level = 0;
466 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
467 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
468 sf->tx_sf.tx_type_search.skip_tx_search = 1;
469
470 sf->rd_sf.perform_coeff_opt = boosted ? 2 : 3;
471 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
472 sf->rd_sf.tx_domain_dist_thres_level = 1;
473
474 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
475 sf->lpf_sf.dual_sgr_penalty_level = 1;
476 sf->lpf_sf.enable_sgr_ep_pruning = 1;
477
478 // TODO(any, yunqing): move this feature to speed 0.
479 sf->tpl_sf.skip_alike_starting_mv = 1;
480 }
481
482 if (speed >= 2) {
483 sf->part_sf.allow_partition_search_skip = 1;
484
485 sf->mv_sf.auto_mv_step_size = 1;
486 sf->mv_sf.subpel_iters_per_step = 1;
487
488 // TODO(chiyotsai@google.com): We can get 10% speed up if we move
489 // adaptive_rd_thresh to speed 1. But currently it performs poorly on some
490 // clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a
491 // bit more closely to figure out why.
492 sf->inter_sf.adaptive_rd_thresh = 1;
493 sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
494 sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
495 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
496 sf->inter_sf.fast_interintra_wedge_search = 1;
497 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
498 sf->inter_sf.prune_compound_using_neighbors = 1;
499 sf->inter_sf.prune_comp_using_best_single_mode_ref = 2;
500 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
501 sf->inter_sf.reuse_best_prediction_for_part_ab = 1;
502 sf->inter_sf.selective_ref_frame = 3;
503 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
504 // Enable fast search only for COMPOUND_DIFFWTD type.
505 sf->inter_sf.enable_fast_compound_mode_search = 1;
506
507 // TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3
508 sf->interp_sf.adaptive_interp_filter_search = 1;
509 sf->interp_sf.disable_dual_filter = 1;
510
511 sf->intra_sf.disable_smooth_intra =
512 !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key > 1);
513 sf->intra_sf.intra_pruning_with_hog = 2;
514
515 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 3 : 4;
516
517 sf->lpf_sf.prune_wiener_based_on_src_var = 1;
518 sf->lpf_sf.prune_sgr_based_on_wiener = 1;
519
520 // TODO(any): Move this from speed 3 to speed 2 so that TPL multithread
521 // is re-enabled at speed 2. This also makes encoder faster. After TPL MT is
522 // fixed and works with compound pred, we can re-evaluate this feature.
523 sf->tpl_sf.allow_compound_pred = 0;
524 sf->tpl_sf.prune_ref_frames_in_tpl = 1;
525 }
526
527 if (speed >= 3) {
528 sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
529 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
530
531 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
532
533 sf->part_sf.less_rectangular_check_level = 2;
534 sf->part_sf.simple_motion_search_prune_agg = 1;
535 sf->part_sf.prune_ext_part_using_split_info = 1;
536
537 // adaptive_motion_search breaks encoder multi-thread tests.
538 // The values in x->pred_mv[] differ for single and multi-thread cases.
539 // See aomedia:1778.
540 // sf->mv_sf.adaptive_motion_search = 1;
541 sf->mv_sf.full_pixel_search_level = 1;
542 sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL;
543 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
544 sf->mv_sf.search_method = DIAMOND;
545
546 sf->inter_sf.mv_cost_upd_level = 1;
547 // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
548 // it with cpi->sf.disable_wedge_search_var_thresh.
549 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
550 // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
551 // and clean-up the speed feature
552 sf->inter_sf.perform_best_rd_based_gating_for_chroma = 1;
553 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 1;
554 sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2;
555 sf->inter_sf.selective_ref_frame = 5;
556 sf->inter_sf.skip_repeated_ref_mv = 1;
557 sf->inter_sf.skip_repeated_full_newmv = 1;
558 sf->inter_sf.reuse_compound_type_decision = 1;
559 sf->inter_sf.txfm_rd_gate_level =
560 boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
561
562 // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are
563 // inherited directly from luma hog with some minor tweaking. Eventually we
564 // should run this with a bayesian optimizer to find the Pareto frontier.
565 sf->intra_sf.chroma_intra_pruning_with_hog = 2;
566 sf->intra_sf.intra_pruning_with_hog = 3;
567 sf->intra_sf.prune_palette_search_level = 2;
568
569 sf->tpl_sf.skip_alike_starting_mv = 2;
570 sf->tpl_sf.prune_intra_modes = 1;
571 sf->tpl_sf.prune_starting_mv = 1;
572 sf->tpl_sf.reduce_first_step_size = 6;
573 sf->tpl_sf.subpel_force_stop = QUARTER_PEL;
574 sf->tpl_sf.search_method = DIAMOND;
575
576 sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
577 sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
578 sf->tx_sf.use_intra_txb_hash = 1;
579
580 // TODO(any): Refactor the code related to following winner mode speed
581 // features
582 sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
583 // TODO(any): Experiment with this speed feature by enabling for key frames
584 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch =
585 frame_is_intra_only(&cpi->common) ? 0 : 1;
586 sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
587 sf->winner_mode_sf.motion_mode_for_winner_cand =
588 boosted
589 ? 0
590 : gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE ? 1
591 : 2;
592
593 // TODO(any): evaluate if these lpf features can be moved to speed 2.
594 // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
595 // loss.
596 sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
597 sf->lpf_sf.disable_loop_restoration_chroma = boosted ? 0 : 1;
598 sf->lpf_sf.reduce_wiener_window_size = !boosted;
599 sf->lpf_sf.prune_wiener_based_on_src_var = 2;
600 }
601
602 if (speed >= 4) {
603 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
604
605 sf->part_sf.simple_motion_search_prune_agg = 2;
606 sf->part_sf.simple_motion_search_reduce_search_steps = 4;
607 sf->part_sf.prune_ext_part_using_split_info = 2;
608 sf->part_sf.early_term_after_none_split = 1;
609 sf->part_sf.ml_predict_breakout_level = 3;
610
611 sf->inter_sf.alt_ref_search_fp = 1;
612 sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 4;
613
614 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2;
615 sf->inter_sf.prune_compound_using_neighbors = 2;
616 sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
617 sf->inter_sf.disable_onesided_comp = 1;
618
619 sf->interp_sf.cb_pred_filter_search = 1;
620 sf->interp_sf.skip_sharp_interp_filter_search = 1;
621 sf->interp_sf.use_interp_filter = 2;
622 sf->interp_sf.adaptive_interp_filter_search = 2;
623
624 sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
625 sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
626 sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
627 sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
628 sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
629 sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
630 // TODO(any): Experiment with this speed feature set to 2 for higher quality
631 // presets as well
632 sf->intra_sf.skip_intra_in_interframe = 2;
633
634 sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
635
636 sf->tpl_sf.prune_starting_mv = 2;
637 sf->tpl_sf.subpel_force_stop = HALF_PEL;
638 sf->tpl_sf.search_method = FAST_BIGDIA;
639
640 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
641 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
642 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
643 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1;
644 // TODO(any): Experiment with enabling of this speed feature as hash state
645 // is reset during winner mode processing
646 sf->tx_sf.use_intra_txb_hash = 0;
647
648 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 5 : 7;
649 sf->rd_sf.tx_domain_dist_thres_level = 2;
650
651 // TODO(any): Extend multi-winner mode processing support for inter frames
652 sf->winner_mode_sf.multi_winner_mode_type =
653 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_DEFAULT
654 : MULTI_WINNER_MODE_OFF;
655 sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
656
657 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
658 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
659
660 sf->mv_sf.reduce_search_range = 1;
661 }
662
663 if (speed >= 5) {
664 sf->part_sf.simple_motion_search_prune_agg = 3;
665 sf->part_sf.ext_partition_eval_thresh =
666 allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
667
668 sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
669 sf->inter_sf.prune_inter_modes_if_skippable = 1;
670 sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 5;
671 // Enable fast search for all valid compound modes.
672 sf->inter_sf.enable_fast_compound_mode_search = 2;
673
674 sf->intra_sf.chroma_intra_pruning_with_hog = 3;
675
676 // TODO(any): Extend multi-winner mode processing support for inter frames
677 sf->winner_mode_sf.multi_winner_mode_type =
678 frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_FAST
679 : MULTI_WINNER_MODE_OFF;
680
681 sf->lpf_sf.use_coarse_filter_level_search =
682 frame_is_intra_only(&cpi->common) ? 0 : 1;
683 sf->lpf_sf.disable_lr_filter = 1;
684
685 sf->mv_sf.prune_mesh_search = 1;
686
687 sf->tpl_sf.prune_starting_mv = 3;
688
689 sf->winner_mode_sf.dc_blk_pred_level = 1;
690 }
691
692 if (speed >= 6) {
693 sf->hl_sf.disable_extra_sc_testing = 1;
694 sf->hl_sf.second_alt_ref_filtering = 0;
695 sf->hl_sf.recode_tolerance = 55;
696
697 sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3;
698 sf->inter_sf.prune_nearmv_using_neighbors = 1;
699
700 sf->intra_sf.chroma_intra_pruning_with_hog = 4;
701 sf->intra_sf.intra_pruning_with_hog = 4;
702
703 sf->part_sf.prune_rectangular_split_based_on_qidx =
704 boosted || allow_screen_content_tools ? 0 : 1;
705 sf->part_sf.prune_sub_8x8_partition_level =
706 allow_screen_content_tools ? 0
707 : frame_is_intra_only(&cpi->common) ? 1 : 2;
708 sf->part_sf.prune_part4_search = 3;
709
710 sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
711 sf->mv_sf.use_bsize_dependent_search_method = 1;
712
713 sf->tpl_sf.disable_gop_length_decision = 1;
714 sf->tpl_sf.subpel_force_stop = FULL_PEL;
715 sf->tpl_sf.disable_filtered_key_tpl = 1;
716
717 sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
718 sf->tx_sf.use_intra_txb_hash = 1;
719 sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
720
721 sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8;
722
723 sf->winner_mode_sf.dc_blk_pred_level = 2;
724 sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
725
726 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
727 }
728
729 // Intra txb hash is currently not compatible with multi-winner mode as the
730 // hashes got reset during multi-winner mode processing.
731 assert(IMPLIES(
732 sf->winner_mode_sf.multi_winner_mode_type != MULTI_WINNER_MODE_OFF,
733 !sf->tx_sf.use_intra_txb_hash));
734 }
735
736 // TODO(kyslov): now this is very similar to
737 // set_good_speed_features_framesize_independent
738 // except it sets non-rd flag on speed8. This function will likely
739 // be modified in the future with RT-specific speed features
set_rt_speed_features_framesize_independent(AV1_COMP * cpi,SPEED_FEATURES * sf,int speed)740 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
741 SPEED_FEATURES *sf,
742 int speed) {
743 AV1_COMMON *const cm = &cpi->common;
744 const int boosted = frame_is_boosted(cpi);
745
746 // Speed 0 for all speed features that give neutral coding performance change.
747 sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
748
749 sf->part_sf.less_rectangular_check_level = 1;
750 sf->part_sf.ml_prune_partition = 1;
751 sf->part_sf.prune_ext_partition_types_search_level = 1;
752
753 // TODO(debargha): Test, tweak and turn on either 1 or 2
754 sf->inter_sf.inter_mode_rd_model_estimation = 0;
755 sf->inter_sf.disable_interintra_wedge_var_thresh = 0;
756 sf->inter_sf.disable_interinter_wedge_var_thresh = 0;
757 sf->inter_sf.model_based_post_interp_filter_breakout = 1;
758 sf->inter_sf.prune_compound_using_single_ref = 0;
759 sf->inter_sf.prune_mode_search_simple_translation = 1;
760 sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted;
761 sf->inter_sf.reduce_inter_modes = 1;
762 sf->inter_sf.selective_ref_frame = 1;
763 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
764
765 sf->interp_sf.use_fast_interpolation_filter_search = 1;
766
767 sf->intra_sf.intra_pruning_with_hog = 1;
768
769 sf->mv_sf.full_pixel_search_level = 1;
770 sf->mv_sf.exhaustive_searches_thresh = INT_MAX;
771
772 sf->rt_sf.check_intra_pred_nonrd = 1;
773 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
774 sf->rt_sf.hybrid_intra_pickmode = 0;
775 sf->rt_sf.nonrd_prune_ref_frame_search = 0;
776 sf->rt_sf.reuse_inter_pred_nonrd = 0;
777 sf->rt_sf.use_comp_ref_nonrd = 1;
778 sf->rt_sf.use_nonrd_filter_search = 1;
779 sf->rt_sf.use_nonrd_pick_mode = 0;
780 sf->rt_sf.use_real_time_ref_set = 0;
781 sf->rt_sf.check_scene_detection = 0;
782 sf->rt_sf.overshoot_detection_cbr = NO_DETECTION;
783 sf->tx_sf.adaptive_txb_search_level = 1;
784 sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
785 sf->tx_sf.model_based_prune_tx_search_level = 1;
786 sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
787 sf->rt_sf.fullpel_search_step_param = 0;
788 sf->rt_sf.skip_loopfilter_non_reference = 0;
789
790 sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_SOLO;
791
792 if (speed >= 1) {
793 sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
794
795 sf->part_sf.prune_ext_partition_types_search_level = 2;
796 sf->part_sf.simple_motion_search_prune_rect = 1;
797
798 sf->mv_sf.obmc_full_pixel_search_level = 1;
799 sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
800
801 sf->inter_sf.prune_comp_search_by_single_result = 1;
802 sf->inter_sf.reuse_inter_intra_mode = 1;
803 sf->inter_sf.selective_ref_frame = 2;
804 sf->inter_sf.skip_repeated_newmv = 1;
805 sf->inter_sf.disable_interintra_wedge_var_thresh = 0;
806 sf->inter_sf.disable_interinter_wedge_var_thresh = 0;
807 sf->inter_sf.prune_comp_type_by_comp_avg = 1;
808
809 sf->interp_sf.cb_pred_filter_search = 1;
810 sf->interp_sf.use_interp_filter = 1;
811
812 sf->tx_sf.adaptive_txb_search_level = 2;
813 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
814 sf->tx_sf.tx_size_search_lgr_block = 1;
815 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
816 sf->tx_sf.tx_type_search.skip_tx_search = 1;
817 sf->tx_sf.use_intra_txb_hash = 1;
818
819 sf->rd_sf.tx_domain_dist_level = boosted ? 0 : 1;
820 sf->rd_sf.tx_domain_dist_thres_level = 1;
821
822 sf->lpf_sf.dual_sgr_penalty_level = 1;
823 }
824
825 if (speed >= 2) {
826 sf->part_sf.allow_partition_search_skip = 1;
827 sf->part_sf.partition_search_breakout_rate_thr = 80;
828
829 sf->mv_sf.auto_mv_step_size = 1;
830 sf->mv_sf.subpel_iters_per_step = 1;
831
832 sf->inter_sf.adaptive_rd_thresh = 1;
833 sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
834 sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
835 sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
836 sf->inter_sf.fast_wedge_sign_estimate = 1;
837 sf->inter_sf.prune_comp_type_by_comp_avg = 2;
838 sf->inter_sf.selective_ref_frame = 3;
839 sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
840
841 sf->interp_sf.adaptive_interp_filter_search = 1;
842 sf->interp_sf.cb_pred_filter_search = 0;
843 sf->interp_sf.disable_dual_filter = 1;
844
845 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
846 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
847 sf->tx_sf.model_based_prune_tx_search_level = 0;
848
849 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
850 }
851
852 if (speed >= 3) {
853 sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
854
855 sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
856
857 sf->part_sf.less_rectangular_check_level = 2;
858
859 sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS;
860 // adaptive_motion_search breaks encoder multi-thread tests.
861 // The values in x->pred_mv[] differ for single and multi-thread cases.
862 // See aomedia:1778.
863 // sf->mv_sf.adaptive_motion_search = 1;
864
865 sf->inter_sf.adaptive_rd_thresh = 2;
866 sf->inter_sf.mv_cost_upd_level = 1;
867 // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
868 // it with cpi->sf.disable_wedge_search_var_thresh.
869 sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
870 sf->inter_sf.prune_comp_search_by_single_result = 2;
871 sf->inter_sf.selective_ref_frame = 4;
872
873 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
874
875 sf->rd_sf.tx_domain_dist_level = 1;
876
877 sf->winner_mode_sf.tx_size_search_level = boosted ? 0 : 2;
878 }
879
880 if (speed >= 4) {
881 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
882
883 sf->inter_sf.alt_ref_search_fp = 1;
884
885 sf->interp_sf.skip_sharp_interp_filter_search = 1;
886
887 sf->tx_sf.tx_type_search.fast_inter_tx_type_search = 1;
888 sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
889 sf->tx_sf.use_intra_txb_hash = 0;
890
891 sf->rd_sf.use_mb_rd_hash = 0;
892
893 sf->winner_mode_sf.tx_size_search_level = frame_is_intra_only(cm) ? 0 : 2;
894 }
895
896 if (speed >= 5) {
897 sf->inter_sf.adaptive_rd_thresh = 4;
898
899 sf->rd_sf.tx_domain_dist_level = 2;
900 sf->rd_sf.tx_domain_dist_thres_level = 2;
901 sf->winner_mode_sf.tx_size_search_level = 1;
902
903 sf->rt_sf.mode_search_skip_flags =
904 (cm->current_frame.frame_type == KEY_FRAME)
905 ? 0
906 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
907 FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
908 FLAG_EARLY_TERMINATE;
909 sf->hl_sf.frame_parameter_update = 0;
910
911 sf->part_sf.default_max_partition_size = BLOCK_128X128;
912 sf->part_sf.default_min_partition_size = BLOCK_8X8;
913 sf->part_sf.max_intra_bsize = BLOCK_32X32;
914 sf->part_sf.partition_search_breakout_rate_thr = 500;
915 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
916 sf->part_sf.adjust_var_based_rd_partitioning = 2;
917
918 sf->mv_sf.search_method = FAST_DIAMOND;
919 sf->mv_sf.subpel_force_stop = QUARTER_PEL;
920 sf->mv_sf.use_fullpel_costlist = 1;
921 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
922
923 sf->inter_sf.inter_mode_rd_model_estimation = 2;
924
925 for (int i = 0; i < TX_SIZES; ++i) {
926 sf->intra_sf.intra_y_mode_mask[i] = INTRA_DC;
927 sf->intra_sf.intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
928 }
929
930 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
931 sf->tx_sf.use_inter_txb_hash = 0;
932 sf->tx_sf.refine_fast_tx_search_results = 0;
933
934 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
935 sf->rd_sf.simple_model_rd_from_var = 1;
936
937 sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
938 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
939
940 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
941 sf->rt_sf.num_inter_modes_for_tx_search = 5;
942 sf->rt_sf.skip_interp_filter_search = 1;
943 sf->rt_sf.use_comp_ref_nonrd = 0;
944 sf->rt_sf.use_real_time_ref_set = 1;
945 sf->rt_sf.use_simple_rd_model = 1;
946
947 sf->rt_sf.check_scene_detection = 1;
948 if (cm->current_frame.frame_type != KEY_FRAME &&
949 cpi->oxcf.rc_cfg.mode == AOM_CBR)
950 sf->rt_sf.overshoot_detection_cbr = FAST_DETECTION_MAXQ;
951 // Enable noise estimation only for high resolutions for now.
952 if (cm->width * cm->height > 640 * 480)
953 sf->rt_sf.use_temporal_noise_estimate = 1;
954 }
955
956 if (speed >= 6) {
957 sf->part_sf.adjust_var_based_rd_partitioning = 1;
958 sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
959 }
960
961 if (speed >= 7) {
962 sf->part_sf.default_max_partition_size = BLOCK_128X128;
963 sf->part_sf.default_min_partition_size = BLOCK_8X8;
964 sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
965
966 sf->mv_sf.search_method = FAST_DIAMOND;
967 sf->mv_sf.subpel_force_stop = QUARTER_PEL;
968 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
969
970 sf->inter_sf.inter_mode_rd_model_estimation = 2;
971
972 sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
973
974 sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
975 sf->rt_sf.nonrd_prune_ref_frame_search = 1;
976 sf->rt_sf.reuse_inter_pred_nonrd = 0;
977 sf->rt_sf.short_circuit_low_temp_var = 0;
978 sf->rt_sf.skip_interp_filter_search = 0;
979 sf->rt_sf.use_comp_ref_nonrd = 0;
980 // For spatial layers, only LAST and GOLDEN are currently used in the SVC
981 // for nonrd. The flag use_nonrd_altref_frame can disable GOLDEN in the
982 // get_ref_frame_flags() for some patterns, so disable it here for
983 // spatial layers.
984 sf->rt_sf.use_nonrd_altref_frame =
985 (cpi->svc.number_spatial_layers > 1) ? 0 : 1;
986 sf->rt_sf.use_nonrd_pick_mode = 1;
987 sf->rt_sf.nonrd_check_partition_merge_mode = 1;
988 sf->rt_sf.nonrd_check_partition_split = 0;
989 sf->rt_sf.hybrid_intra_pickmode = 1;
990 sf->rt_sf.skip_intra_pred_if_tx_skip = 1;
991 // For SVC: use better mv search on base temporal layer, and only
992 // on base spatial layer if highest resolution is above 640x360.
993 if (cpi->svc.number_temporal_layers > 1) {
994 if (cpi->svc.temporal_layer_id == 0 &&
995 (cpi->svc.spatial_layer_id == 0 ||
996 cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
997 640 * 360)) {
998 sf->mv_sf.search_method = NSTEP;
999 sf->mv_sf.subpel_search_method = SUBPEL_TREE;
1000 sf->rt_sf.fullpel_search_step_param = 6;
1001 } else if (cpi->svc.non_reference_frame) {
1002 sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1003 sf->rt_sf.fullpel_search_step_param = 10;
1004 }
1005 }
1006 // TODO(marpan): Look into why enabling skip_loopfilter_non_reference is
1007 // not bitexact on rtc testset, its very close (< ~0.01 bdrate), but not
1008 // always bitexact.
1009 if (cpi->use_svc && cpi->svc.non_reference_frame &&
1010 sf->lpf_sf.cdef_pick_method == CDEF_PICK_FROM_Q &&
1011 sf->lpf_sf.lpf_pick == LPF_PICK_FROM_Q)
1012 sf->rt_sf.skip_loopfilter_non_reference = 1;
1013 // Set mask for intra modes.
1014 for (int i = 0; i < BLOCK_SIZES; ++i)
1015 if (i >= BLOCK_32X32)
1016 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1017 else
1018 // Use DC, H, V intra mode for block sizes < 32X32.
1019 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1020 }
1021
1022 if (speed >= 8) {
1023 sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1024 sf->rt_sf.short_circuit_low_temp_var = 1;
1025 #if !CONFIG_AV1_TEMPORAL_DENOISING
1026 sf->rt_sf.reuse_inter_pred_nonrd = 1;
1027 #endif
1028 sf->rt_sf.use_nonrd_altref_frame = 0;
1029 sf->rt_sf.nonrd_prune_ref_frame_search = 2;
1030 sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1031 sf->rt_sf.nonrd_check_partition_split = 0;
1032 sf->rt_sf.use_modeled_non_rd_cost = 1;
1033 sf->rt_sf.source_metrics_sb_nonrd = 1;
1034 sf->rt_sf.skip_intra_pred_if_tx_skip = 0;
1035 sf->interp_sf.cb_pred_filter_search = 1;
1036 }
1037 if (speed >= 9) {
1038 sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1039 sf->rt_sf.force_large_partition_blocks = 1;
1040 for (int i = 0; i < BLOCK_SIZES; ++i)
1041 sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1042 }
1043 }
1044
init_hl_sf(HIGH_LEVEL_SPEED_FEATURES * hl_sf)1045 static AOM_INLINE void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
1046 // best quality defaults
1047 hl_sf->frame_parameter_update = 1;
1048 hl_sf->recode_loop = ALLOW_RECODE;
1049 // Recode loop tolerance %.
1050 hl_sf->recode_tolerance = 25;
1051 hl_sf->high_precision_mv_usage = CURRENT_Q;
1052 hl_sf->superres_auto_search_type = SUPERRES_AUTO_ALL;
1053 hl_sf->disable_extra_sc_testing = 0;
1054 hl_sf->second_alt_ref_filtering = 1;
1055 }
1056
init_tpl_sf(TPL_SPEED_FEATURES * tpl_sf)1057 static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
1058 tpl_sf->disable_gop_length_decision = 0;
1059 tpl_sf->prune_intra_modes = 0;
1060 tpl_sf->prune_starting_mv = 0;
1061 tpl_sf->reduce_first_step_size = 0;
1062 tpl_sf->skip_alike_starting_mv = 0;
1063 tpl_sf->subpel_force_stop = EIGHTH_PEL;
1064 tpl_sf->search_method = NSTEP;
1065 tpl_sf->disable_filtered_key_tpl = 0;
1066 tpl_sf->prune_ref_frames_in_tpl = 0;
1067 tpl_sf->allow_compound_pred = 1;
1068 }
1069
init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES * gm_sf)1070 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
1071 gm_sf->gm_search_type = GM_FULL_SEARCH;
1072 gm_sf->prune_ref_frame_for_gm_search = 0;
1073 }
1074
init_part_sf(PARTITION_SPEED_FEATURES * part_sf)1075 static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
1076 part_sf->partition_search_type = SEARCH_PARTITION;
1077 part_sf->less_rectangular_check_level = 0;
1078 part_sf->use_square_partition_only_threshold = BLOCK_128X128;
1079 part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
1080 part_sf->default_max_partition_size = BLOCK_LARGEST;
1081 part_sf->default_min_partition_size = BLOCK_4X4;
1082 part_sf->adjust_var_based_rd_partitioning = 0;
1083 part_sf->allow_partition_search_skip = 0;
1084 part_sf->max_intra_bsize = BLOCK_LARGEST;
1085 // This setting only takes effect when partition_search_type is set
1086 // to FIXED_PARTITION.
1087 part_sf->fixed_partition_size = BLOCK_16X16;
1088 // Recode loop tolerance %.
1089 part_sf->partition_search_breakout_dist_thr = 0;
1090 part_sf->partition_search_breakout_rate_thr = 0;
1091 part_sf->prune_ext_partition_types_search_level = 0;
1092 part_sf->prune_part4_search = 0;
1093 part_sf->ml_prune_partition = 0;
1094 part_sf->ml_early_term_after_part_split_level = 0;
1095 for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
1096 part_sf->ml_partition_search_breakout_thresh[i] =
1097 -1; // -1 means not enabled.
1098 }
1099 part_sf->simple_motion_search_prune_agg = 0;
1100 part_sf->simple_motion_search_split = 0;
1101 part_sf->simple_motion_search_prune_rect = 0;
1102 part_sf->simple_motion_search_early_term_none = 0;
1103 part_sf->simple_motion_search_reduce_search_steps = 0;
1104 part_sf->intra_cnn_split = 0;
1105 part_sf->ext_partition_eval_thresh = BLOCK_8X8;
1106 part_sf->prune_ext_part_using_split_info = 0;
1107 part_sf->prune_rectangular_split_based_on_qidx = 0;
1108 part_sf->early_term_after_none_split = 0;
1109 part_sf->ml_predict_breakout_level = 0;
1110 part_sf->prune_sub_8x8_partition_level = 0;
1111 }
1112
init_mv_sf(MV_SPEED_FEATURES * mv_sf)1113 static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
1114 mv_sf->full_pixel_search_level = 0;
1115 mv_sf->auto_mv_step_size = 0;
1116 mv_sf->exhaustive_searches_thresh = 0;
1117 mv_sf->obmc_full_pixel_search_level = 0;
1118 mv_sf->prune_mesh_search = 0;
1119 mv_sf->reduce_search_range = 0;
1120 mv_sf->search_method = NSTEP;
1121 mv_sf->simple_motion_subpel_force_stop = EIGHTH_PEL;
1122 mv_sf->subpel_force_stop = EIGHTH_PEL;
1123 mv_sf->subpel_iters_per_step = 2;
1124 mv_sf->subpel_search_method = SUBPEL_TREE;
1125 mv_sf->use_accurate_subpel_search = USE_8_TAPS;
1126 mv_sf->use_bsize_dependent_search_method = 0;
1127 mv_sf->use_fullpel_costlist = 0;
1128 mv_sf->use_downsampled_sad = 0;
1129 mv_sf->disable_extensive_joint_motion_search = 0;
1130 }
1131
init_inter_sf(INTER_MODE_SPEED_FEATURES * inter_sf)1132 static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
1133 inter_sf->comp_inter_joint_search_thresh = BLOCK_4X4;
1134 inter_sf->adaptive_rd_thresh = 0;
1135 inter_sf->model_based_post_interp_filter_breakout = 0;
1136 inter_sf->reduce_inter_modes = 0;
1137 inter_sf->alt_ref_search_fp = 0;
1138 inter_sf->selective_ref_frame = 0;
1139 inter_sf->prune_ref_frame_for_rect_partitions = 0;
1140 inter_sf->fast_wedge_sign_estimate = 0;
1141 inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
1142 inter_sf->reuse_inter_intra_mode = 0;
1143 inter_sf->mv_cost_upd_level = 0;
1144 inter_sf->prune_inter_modes_based_on_tpl = 0;
1145 inter_sf->prune_nearmv_using_neighbors = 0;
1146 inter_sf->prune_comp_search_by_single_result = 0;
1147 inter_sf->skip_repeated_ref_mv = 0;
1148 inter_sf->skip_repeated_newmv = 0;
1149 inter_sf->skip_repeated_full_newmv = 0;
1150 inter_sf->inter_mode_rd_model_estimation = 0;
1151 inter_sf->prune_compound_using_single_ref = 0;
1152 inter_sf->prune_compound_using_neighbors = 0;
1153 inter_sf->prune_comp_using_best_single_mode_ref = 0;
1154 inter_sf->disable_onesided_comp = 0;
1155 inter_sf->prune_mode_search_simple_translation = 0;
1156 inter_sf->prune_comp_type_by_comp_avg = 0;
1157 inter_sf->disable_interinter_wedge_newmv_search = 0;
1158 inter_sf->fast_interintra_wedge_search = 0;
1159 inter_sf->prune_comp_type_by_model_rd = 0;
1160 inter_sf->perform_best_rd_based_gating_for_chroma = 0;
1161 inter_sf->prune_obmc_prob_thresh = 0;
1162 inter_sf->disable_interinter_wedge_var_thresh = 0;
1163 inter_sf->disable_interintra_wedge_var_thresh = 0;
1164 inter_sf->prune_ref_mv_idx_search = 0;
1165 inter_sf->prune_warped_prob_thresh = 0;
1166 inter_sf->reuse_compound_type_decision = 0;
1167 inter_sf->txfm_rd_gate_level = 0;
1168 inter_sf->prune_inter_modes_if_skippable = 0;
1169 inter_sf->disable_masked_comp = 0;
1170 inter_sf->reuse_best_prediction_for_part_ab = 0;
1171 inter_sf->enable_fast_compound_mode_search = 0;
1172 }
1173
init_interp_sf(INTERP_FILTER_SPEED_FEATURES * interp_sf)1174 static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
1175 interp_sf->adaptive_interp_filter_search = 0;
1176 interp_sf->cb_pred_filter_search = 0;
1177 interp_sf->disable_dual_filter = 0;
1178 interp_sf->skip_sharp_interp_filter_search = 0;
1179 interp_sf->use_fast_interpolation_filter_search = 0;
1180 interp_sf->use_interp_filter = 0;
1181 }
1182
init_intra_sf(INTRA_MODE_SPEED_FEATURES * intra_sf)1183 static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
1184 intra_sf->chroma_intra_pruning_with_hog = 0;
1185 intra_sf->skip_intra_in_interframe = 1;
1186 intra_sf->intra_pruning_with_hog = 0;
1187 intra_sf->prune_palette_search_level = 0;
1188
1189 for (int i = 0; i < TX_SIZES; i++) {
1190 intra_sf->intra_y_mode_mask[i] = INTRA_ALL;
1191 intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
1192 }
1193 intra_sf->disable_smooth_intra = 0;
1194 }
1195
init_tx_sf(TX_SPEED_FEATURES * tx_sf)1196 static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
1197 tx_sf->inter_tx_size_search_init_depth_sqr = 0;
1198 tx_sf->inter_tx_size_search_init_depth_rect = 0;
1199 tx_sf->intra_tx_size_search_init_depth_rect = 0;
1200 tx_sf->intra_tx_size_search_init_depth_sqr = 0;
1201 tx_sf->tx_size_search_lgr_block = 0;
1202 tx_sf->model_based_prune_tx_search_level = 0;
1203 tx_sf->tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_1;
1204 tx_sf->tx_type_search.ml_tx_split_thresh = 8500;
1205 tx_sf->tx_type_search.use_skip_flag_prediction = 1;
1206 tx_sf->tx_type_search.use_reduced_intra_txset = 0;
1207 tx_sf->tx_type_search.fast_intra_tx_type_search = 0;
1208 tx_sf->tx_type_search.fast_inter_tx_type_search = 0;
1209 tx_sf->tx_type_search.skip_tx_search = 0;
1210 tx_sf->tx_type_search.prune_tx_type_using_stats = 0;
1211 tx_sf->tx_type_search.prune_tx_type_est_rd = 0;
1212 tx_sf->tx_type_search.winner_mode_tx_type_pruning = 0;
1213 tx_sf->txb_split_cap = 1;
1214 tx_sf->adaptive_txb_search_level = 0;
1215 tx_sf->use_intra_txb_hash = 0;
1216 tx_sf->use_inter_txb_hash = 1;
1217 tx_sf->refine_fast_tx_search_results = 1;
1218 tx_sf->prune_tx_size_level = 0;
1219 }
1220
init_rd_sf(RD_CALC_SPEED_FEATURES * rd_sf,const AV1EncoderConfig * oxcf)1221 static AOM_INLINE void init_rd_sf(RD_CALC_SPEED_FEATURES *rd_sf,
1222 const AV1EncoderConfig *oxcf) {
1223 const int disable_trellis_quant = oxcf->algo_cfg.disable_trellis_quant;
1224 if (disable_trellis_quant == 3) {
1225 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
1226 ? NO_ESTIMATE_YRD_TRELLIS_OPT
1227 : NO_TRELLIS_OPT;
1228 } else if (disable_trellis_quant == 2) {
1229 rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
1230 ? FINAL_PASS_TRELLIS_OPT
1231 : NO_TRELLIS_OPT;
1232 } else if (disable_trellis_quant == 0) {
1233 if (is_lossless_requested(&oxcf->rc_cfg)) {
1234 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
1235 } else {
1236 rd_sf->optimize_coefficients = FULL_TRELLIS_OPT;
1237 }
1238 } else if (disable_trellis_quant == 1) {
1239 rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
1240 } else {
1241 assert(0 && "Invalid disable_trellis_quant value");
1242 }
1243 rd_sf->use_mb_rd_hash = 1;
1244 rd_sf->simple_model_rd_from_var = 0;
1245 rd_sf->tx_domain_dist_level = 0;
1246 rd_sf->tx_domain_dist_thres_level = 0;
1247 rd_sf->perform_coeff_opt = 0;
1248 }
1249
init_winner_mode_sf(WINNER_MODE_SPEED_FEATURES * winner_mode_sf)1250 static AOM_INLINE void init_winner_mode_sf(
1251 WINNER_MODE_SPEED_FEATURES *winner_mode_sf) {
1252 winner_mode_sf->motion_mode_for_winner_cand = 0;
1253 // Set this at the appropriate speed levels
1254 winner_mode_sf->tx_size_search_level = USE_FULL_RD;
1255 winner_mode_sf->enable_winner_mode_for_coeff_opt = 0;
1256 winner_mode_sf->enable_winner_mode_for_tx_size_srch = 0;
1257 winner_mode_sf->enable_winner_mode_for_use_tx_domain_dist = 0;
1258 winner_mode_sf->multi_winner_mode_type = 0;
1259 winner_mode_sf->dc_blk_pred_level = 0;
1260 }
1261
init_lpf_sf(LOOP_FILTER_SPEED_FEATURES * lpf_sf)1262 static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
1263 lpf_sf->disable_loop_restoration_chroma = 0;
1264 lpf_sf->prune_wiener_based_on_src_var = 0;
1265 lpf_sf->prune_sgr_based_on_wiener = 0;
1266 lpf_sf->enable_sgr_ep_pruning = 0;
1267 lpf_sf->reduce_wiener_window_size = 0;
1268 lpf_sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
1269 lpf_sf->use_coarse_filter_level_search = 0;
1270 lpf_sf->cdef_pick_method = CDEF_FULL_SEARCH;
1271 // Set decoder side speed feature to use less dual sgr modes
1272 lpf_sf->dual_sgr_penalty_level = 0;
1273 lpf_sf->disable_lr_filter = 0;
1274 }
1275
init_rt_sf(REAL_TIME_SPEED_FEATURES * rt_sf)1276 static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
1277 rt_sf->mode_search_skip_flags = 0;
1278 rt_sf->skip_interp_filter_search = 0;
1279 rt_sf->force_tx_search_off = 0;
1280 rt_sf->num_inter_modes_for_tx_search = INT_MAX;
1281 rt_sf->use_simple_rd_model = 0;
1282 rt_sf->nonrd_check_partition_merge_mode = 0;
1283 rt_sf->nonrd_check_partition_split = 0;
1284 rt_sf->skip_intra_pred_if_tx_skip = 0;
1285 }
1286
av1_set_speed_features_framesize_dependent(AV1_COMP * cpi,int speed)1287 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
1288 SPEED_FEATURES *const sf = &cpi->sf;
1289 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1290
1291 if (oxcf->mode == GOOD) {
1292 set_good_speed_feature_framesize_dependent(cpi, sf, speed);
1293 } else if (oxcf->mode == REALTIME) {
1294 set_rt_speed_feature_framesize_dependent(cpi, sf, speed);
1295 }
1296
1297 if (!cpi->seq_params_locked) {
1298 cpi->common.seq_params.enable_masked_compound &=
1299 !sf->inter_sf.disable_masked_comp;
1300 }
1301
1302 // This is only used in motion vector unit test.
1303 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 1)
1304 cpi->mv_search_params.find_fractional_mv_step = av1_return_max_sub_pixel_mv;
1305 else if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 2)
1306 cpi->mv_search_params.find_fractional_mv_step = av1_return_min_sub_pixel_mv;
1307
1308 if ((cpi->oxcf.row_mt == 1) && (cpi->oxcf.max_threads > 1)) {
1309 if (sf->inter_sf.mv_cost_upd_level > 1) {
1310 // Set mv_cost_upd_level to use row level update.
1311 sf->inter_sf.mv_cost_upd_level = 1;
1312 }
1313 }
1314 }
1315
av1_set_speed_features_framesize_independent(AV1_COMP * cpi,int speed)1316 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
1317 SPEED_FEATURES *const sf = &cpi->sf;
1318 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
1319 const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1320 int i;
1321
1322 init_hl_sf(&sf->hl_sf);
1323 init_tpl_sf(&sf->tpl_sf);
1324 init_gm_sf(&sf->gm_sf);
1325 init_part_sf(&sf->part_sf);
1326 init_mv_sf(&sf->mv_sf);
1327 init_inter_sf(&sf->inter_sf);
1328 init_interp_sf(&sf->interp_sf);
1329 init_intra_sf(&sf->intra_sf);
1330 init_tx_sf(&sf->tx_sf);
1331 init_rd_sf(&sf->rd_sf, oxcf);
1332 init_winner_mode_sf(&sf->winner_mode_sf);
1333 init_lpf_sf(&sf->lpf_sf);
1334 init_rt_sf(&sf->rt_sf);
1335
1336 if (oxcf->mode == GOOD)
1337 set_good_speed_features_framesize_independent(cpi, sf, speed);
1338 else if (oxcf->mode == REALTIME)
1339 set_rt_speed_features_framesize_independent(cpi, sf, speed);
1340
1341 if (!cpi->seq_params_locked) {
1342 cpi->common.seq_params.enable_dual_filter &=
1343 !sf->interp_sf.disable_dual_filter;
1344 cpi->common.seq_params.enable_restoration &= !sf->lpf_sf.disable_lr_filter;
1345
1346 cpi->common.seq_params.enable_interintra_compound &=
1347 (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
1348 }
1349
1350 // TODO(any) Currently use_intra_txb_hash is enabled in speed 1,2 for
1351 // intra-only encoding (key_freq_max == 0). Experiment with this speed feature
1352 // by enabling for image encoding in speed 1 and 2.
1353 if (cpi->oxcf.kf_cfg.key_freq_max == 0 && speed >= 1 && speed <= 2) {
1354 sf->tx_sf.use_intra_txb_hash = 1;
1355 }
1356
1357 // sf->part_sf.partition_search_breakout_dist_thr is set assuming max 64x64
1358 // blocks. Normalise this if the blocks are bigger.
1359 if (MAX_SB_SIZE_LOG2 > 6) {
1360 sf->part_sf.partition_search_breakout_dist_thr <<=
1361 2 * (MAX_SB_SIZE_LOG2 - 6);
1362 }
1363
1364 const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
1365 for (i = 0; i < MAX_MESH_STEP; ++i) {
1366 sf->mv_sf.mesh_patterns[i].range =
1367 good_quality_mesh_patterns[mesh_speed][i].range;
1368 sf->mv_sf.mesh_patterns[i].interval =
1369 good_quality_mesh_patterns[mesh_speed][i].interval;
1370 }
1371
1372 // Update the mesh pattern of exhaustive motion search for intraBC
1373 // Though intraBC mesh pattern is populated for all frame types, it is used
1374 // only for intra frames of screen contents
1375 for (i = 0; i < MAX_MESH_STEP; ++i) {
1376 sf->mv_sf.intrabc_mesh_patterns[i].range =
1377 intrabc_mesh_patterns[mesh_speed][i].range;
1378 sf->mv_sf.intrabc_mesh_patterns[i].interval =
1379 intrabc_mesh_patterns[mesh_speed][i].interval;
1380 }
1381
1382 // Slow quant, dct and trellis not worthwhile for first pass
1383 // so make sure they are always turned off.
1384 if (is_stat_generation_stage(cpi))
1385 sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
1386
1387 // No recode or trellis for 1 pass.
1388 if (oxcf->pass == 0 && has_no_stats_stage(cpi))
1389 sf->hl_sf.recode_loop = DISALLOW_RECODE;
1390
1391 MotionVectorSearchParams *const mv_search_params = &cpi->mv_search_params;
1392 if (sf->mv_sf.subpel_search_method == SUBPEL_TREE) {
1393 mv_search_params->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
1394 } else if (sf->mv_sf.subpel_search_method == SUBPEL_TREE_PRUNED) {
1395 mv_search_params->find_fractional_mv_step =
1396 av1_find_best_sub_pixel_tree_pruned;
1397 } else if (sf->mv_sf.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
1398 mv_search_params->find_fractional_mv_step =
1399 av1_find_best_sub_pixel_tree_pruned_more;
1400 }
1401
1402 // This is only used in motion vector unit test.
1403 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 1)
1404 mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
1405 else if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 2)
1406 mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
1407
1408 // assert ensures that tx_domain_dist_level is accessed correctly
1409 assert(cpi->sf.rd_sf.tx_domain_dist_thres_level >= 0 &&
1410 cpi->sf.rd_sf.tx_domain_dist_thres_level < 3);
1411 memcpy(winner_mode_params->tx_domain_dist_threshold,
1412 tx_domain_dist_thresholds[cpi->sf.rd_sf.tx_domain_dist_thres_level],
1413 sizeof(winner_mode_params->tx_domain_dist_threshold));
1414
1415 assert(cpi->sf.rd_sf.tx_domain_dist_level >= 0 &&
1416 cpi->sf.rd_sf.tx_domain_dist_level < 3);
1417 memcpy(winner_mode_params->use_transform_domain_distortion,
1418 tx_domain_dist_types[cpi->sf.rd_sf.tx_domain_dist_level],
1419 sizeof(winner_mode_params->use_transform_domain_distortion));
1420
1421 // assert ensures that coeff_opt_thresholds is accessed correctly
1422 assert(cpi->sf.rd_sf.perform_coeff_opt >= 0 &&
1423 cpi->sf.rd_sf.perform_coeff_opt < 9);
1424 memcpy(winner_mode_params->coeff_opt_thresholds,
1425 &coeff_opt_thresholds[cpi->sf.rd_sf.perform_coeff_opt],
1426 sizeof(winner_mode_params->coeff_opt_thresholds));
1427
1428 // assert ensures that predict_skip_levels is accessed correctly
1429 assert(cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction >= 0 &&
1430 cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction < 3);
1431 memcpy(winner_mode_params->skip_txfm_level,
1432 predict_skip_levels[cpi->sf.tx_sf.tx_type_search
1433 .use_skip_flag_prediction],
1434 sizeof(winner_mode_params->skip_txfm_level));
1435
1436 // assert ensures that tx_size_search_level is accessed correctly
1437 assert(cpi->sf.winner_mode_sf.tx_size_search_level >= 0 &&
1438 cpi->sf.winner_mode_sf.tx_size_search_level < 3);
1439 memcpy(winner_mode_params->tx_size_search_methods,
1440 tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level],
1441 sizeof(winner_mode_params->tx_size_search_methods));
1442 memcpy(winner_mode_params->predict_dc_level,
1443 predict_dc_levels[cpi->sf.winner_mode_sf.dc_blk_pred_level],
1444 sizeof(winner_mode_params->predict_dc_level));
1445
1446 if (cpi->oxcf.row_mt == 1 && (cpi->oxcf.max_threads > 1)) {
1447 if (sf->inter_sf.inter_mode_rd_model_estimation == 1) {
1448 // Revert to type 2
1449 sf->inter_sf.inter_mode_rd_model_estimation = 2;
1450 }
1451
1452 // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve
1453 // better parallelism when number of threads available are greater than or
1454 // equal to maximum number of reference frames allowed for global motion.
1455 if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH &&
1456 (cpi->oxcf.max_threads >=
1457 gm_available_reference_frames[sf->gm_sf.gm_search_type]))
1458 sf->gm_sf.prune_ref_frame_for_gm_search = 0;
1459 }
1460 }
1461
1462 // Override some speed features based on qindex
av1_set_speed_features_qindex_dependent(AV1_COMP * cpi,int speed)1463 void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
1464 AV1_COMMON *const cm = &cpi->common;
1465 SPEED_FEATURES *const sf = &cpi->sf;
1466 WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
1467 const int boosted = frame_is_boosted(cpi);
1468 const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
1469 const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
1470 const int is_arf2_bwd_type =
1471 cpi->gf_group.update_type[cpi->gf_group.index] == INTNL_ARF_UPDATE;
1472
1473 if (cpi->oxcf.mode == GOOD && speed == 0) {
1474 // qindex_thresh for resolution < 720p
1475 const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
1476 if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) {
1477 sf->inter_sf.skip_repeated_newmv = 1;
1478 sf->part_sf.simple_motion_search_split =
1479 cm->features.allow_screen_content_tools ? 1 : 2;
1480 sf->part_sf.simple_motion_search_early_term_none = 1;
1481 sf->tx_sf.model_based_prune_tx_search_level = 0;
1482 }
1483
1484 if (is_720p_or_larger && cm->quant_params.base_qindex <= 128) {
1485 sf->rd_sf.perform_coeff_opt = 2 + is_1080p_or_larger;
1486 memcpy(winner_mode_params->coeff_opt_thresholds,
1487 &coeff_opt_thresholds[sf->rd_sf.perform_coeff_opt],
1488 sizeof(winner_mode_params->coeff_opt_thresholds));
1489 sf->part_sf.simple_motion_search_split =
1490 cm->features.allow_screen_content_tools ? 1 : 2;
1491 sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
1492 sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
1493 sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
1494 sf->inter_sf.skip_repeated_newmv = 1;
1495 sf->tx_sf.model_based_prune_tx_search_level = 0;
1496
1497 if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) {
1498 sf->inter_sf.selective_ref_frame = 2;
1499 sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
1500 sf->rd_sf.tx_domain_dist_thres_level = 1;
1501 sf->part_sf.simple_motion_search_early_term_none = 1;
1502 sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
1503 sf->interp_sf.cb_pred_filter_search = 0;
1504 sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
1505 sf->tx_sf.tx_type_search.skip_tx_search = 1;
1506 sf->tx_sf.use_intra_txb_hash = 1;
1507 }
1508 }
1509 }
1510
1511 if (cpi->oxcf.mode == GOOD && speed >= 3) {
1512 // Disable extended partitions for lower quantizers
1513 const int qindex_thresh =
1514 cm->features.allow_screen_content_tools ? 50 : 100;
1515 if (cm->quant_params.base_qindex <= qindex_thresh && !boosted) {
1516 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
1517 }
1518 }
1519
1520 if (cpi->oxcf.mode == GOOD && speed >= 4) {
1521 // Disable extended partitions for lower quantizers
1522 const int qindex_thresh = boosted ? 80 : 120;
1523 if (cm->quant_params.base_qindex <= qindex_thresh &&
1524 !frame_is_intra_only(&cpi->common)) {
1525 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
1526 }
1527 }
1528
1529 if (cpi->oxcf.mode == GOOD && speed >= 5) {
1530 const int qindex_thresh = boosted ? 100 : 160;
1531 if (cm->quant_params.base_qindex <= qindex_thresh &&
1532 !frame_is_intra_only(&cpi->common)) {
1533 sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
1534 }
1535 }
1536
1537 if (cpi->oxcf.mode == GOOD && (speed <= 2)) {
1538 if (!is_stat_generation_stage(cpi)) {
1539 // Use faster full-pel motion search for high quantizers.
1540 // Also use reduced total search range for low resolutions at high
1541 // quantizers.
1542 const int aggr = speed;
1543 const int qindex_thresh1 = ms_qindex_thresh[aggr][is_720p_or_larger][0];
1544 const int qindex_thresh2 = ms_qindex_thresh[aggr][is_720p_or_larger][1];
1545 const SEARCH_METHODS search_method =
1546 motion_search_method[is_720p_or_larger];
1547 if (cm->quant_params.base_qindex > qindex_thresh1) {
1548 sf->mv_sf.search_method = search_method;
1549 sf->tpl_sf.search_method = search_method;
1550 } else if (cm->quant_params.base_qindex > qindex_thresh2) {
1551 sf->mv_sf.search_method = NSTEP_8PT;
1552 }
1553 }
1554 }
1555 }
1556