1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 
14 #include "av1/common/reconintra.h"
15 
16 #include "av1/encoder/encoder.h"
17 #include "av1/encoder/speed_features.h"
18 #include "av1/encoder/rdopt.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 
22 #define MAX_MESH_SPEED 5  // Max speed setting for mesh motion method
23 // Max speed setting for tx domain evaluation
24 #define MAX_TX_DOMAIN_EVAL_SPEED 5
25 static MESH_PATTERN
26     good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
27       { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
28       { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
29       { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
30       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
31       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
32       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
33     };
34 
35 // TODO(huisu@google.com): These settings are pretty relaxed, tune them for
36 // each speed setting
37 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
38   { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
39   { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
40   { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
41   { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
42   { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
43   { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
44 };
45 
46 // Threshold values to be used for pruning the txfm_domain_distortion
47 // based on block MSE
48 // Index 0: Default mode evaluation, Winner mode processing is not
49 // applicable (Eg : IntraBc). Index 1: Mode evaluation.
50 // Index 2: Winner mode evaluation. Index 1 and 2 are applicable when
51 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
52 // TODO(any): Experiment the threshold logic based on variance metric
53 static unsigned int tx_domain_dist_thresholds[3][MODE_EVAL_TYPES] = {
54   { UINT_MAX, UINT_MAX, UINT_MAX }, { 22026, 22026, 22026 }, { 0, 0, 0 }
55 };
56 
57 // Transform domain distortion type to be used for default, mode and winner mode
58 // evaluation Index 0: Default mode evaluation, Winner mode processing is not
59 // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode
60 // evaluation. Index 1 and 2 are applicable when
61 // enable_winner_mode_for_use_tx_domain_dist speed feature is ON
62 static unsigned int tx_domain_dist_types[3][MODE_EVAL_TYPES] = { { 0, 2, 0 },
63                                                                  { 1, 2, 0 },
64                                                                  { 2, 2, 0 } };
65 
66 // Threshold values to be used for disabling coeff RD-optimization
67 // based on block MSE / qstep^2.
68 // TODO(any): Experiment the threshold logic based on variance metric.
69 // Table has satd and dist threshold value index 0 : dist,index 1: satd
70 // For each row, the indices are as follows.
71 // Index 0: Default mode evaluation, Winner mode processing is not applicable
72 // (Eg : IntraBc)
73 // Index 1: Mode evaluation.
74 // Index 2: Winner mode evaluation.
75 // Index 1 and 2 are applicable when enable_winner_mode_for_coeff_opt speed
76 // feature is ON
77 // There are 7 levels with increasing speed, mapping to vertical indices.
78 static unsigned int coeff_opt_thresholds[9][MODE_EVAL_TYPES][2] = {
79   { { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX } },
80   { { 3200, UINT_MAX }, { 250, UINT_MAX }, { UINT_MAX, UINT_MAX } },
81   { { 1728, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
82   { { 864, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
83   { { 432, UINT_MAX }, { 86, UINT_MAX }, { UINT_MAX, UINT_MAX } },
84   { { 864, 97 }, { 142, 16 }, { UINT_MAX, UINT_MAX } },
85   { { 432, 97 }, { 86, 16 }, { UINT_MAX, UINT_MAX } },
86   { { 216, 25 }, { 86, 10 }, { UINT_MAX, UINT_MAX } },
87   { { 216, 25 }, { 0, 10 }, { UINT_MAX, UINT_MAX } }
88 };
89 
90 // Transform size to be used for default, mode and winner mode evaluation
91 // Index 0: Default mode evaluation, Winner mode processing is not applicable
92 // (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation.
93 // Index 1 and 2 are applicable when enable_winner_mode_for_tx_size_srch speed
94 // feature is ON
95 static TX_SIZE_SEARCH_METHOD tx_size_search_methods[3][MODE_EVAL_TYPES] = {
96   { USE_FULL_RD, USE_LARGESTALL, USE_FULL_RD },
97   { USE_FAST_RD, USE_LARGESTALL, USE_FULL_RD },
98   { USE_LARGESTALL, USE_LARGESTALL, USE_FULL_RD }
99 };
100 
101 // Predict transform skip levels to be used for default, mode and winner mode
102 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not
103 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
104 // Values indicate the aggressiveness of skip flag prediction.
105 // 0 : no early skip prediction
106 // 1 : conservative early skip prediction using DCT_DCT
107 // 2 : early skip prediction based on SSE
108 static unsigned int predict_skip_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
109                                                                 { 1, 1, 1 },
110                                                                 { 1, 2, 1 } };
111 
112 // Predict DC block levels to be used for default, mode and winner mode
113 // evaluation. Index 0: Default mode evaluation, Winner mode processing is not
114 // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation
115 // Values indicate the aggressiveness of skip flag prediction.
116 // 0 : no early DC block prediction
117 // 1 : Early DC block prediction based on error variance
118 static unsigned int predict_dc_levels[3][MODE_EVAL_TYPES] = { { 0, 0, 0 },
119                                                               { 1, 1, 0 },
120                                                               { 1, 1, 1 } };
121 
122 // This table holds the maximum number of reference frames for global motion.
123 // The table is indexed as per the speed feature 'gm_search_type'.
124 // 0 : All reference frames are allowed.
125 // 1 : All reference frames except L2 and L3 are allowed.
126 // 2 : All reference frames except L2, L3 and ARF2 are allowed.
127 // 3 : No reference frame is allowed.
128 static int gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = {
129   INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0
130 };
131 
132 // Qindex threshold levels used for selecting full-pel motion search.
133 // ms_qthresh[i][j][k] indicates the qindex boundary value for 'k'th qindex band
134 // for resolution index 'j' for aggressiveness level 'i'.
135 // Aggressiveness increases from i = 0 to 2.
136 // j = 0: lower than 720p resolution, j = 1: 720p or larger resolution.
137 // Currently invoked only for speed 0, 1 and 2.
138 static int ms_qindex_thresh[3][2][2] = { { { 200, 70 }, { MAXQ, 200 } },
139                                          { { 170, 50 }, { MAXQ, 200 } },
140                                          { { 170, 40 }, { 200, 40 } } };
141 
142 // Full-pel search methods for aggressive search based on qindex.
143 // Index 0 is for resolutions lower than 720p, index 1 for 720p or larger
144 // resolutions. Currently invoked only for speed 1 and 2.
145 static SEARCH_METHODS motion_search_method[2] = { CLAMPED_DIAMOND, DIAMOND };
146 
147 // Intra only frames, golden frames (except alt ref overlays) and
148 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const AV1_COMP * cpi)149 static int frame_is_boosted(const AV1_COMP *cpi) {
150   return frame_is_kf_gf_arf(cpi);
151 }
152 
set_good_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)153 static void set_good_speed_feature_framesize_dependent(
154     const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
155   const AV1_COMMON *const cm = &cpi->common;
156   const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
157   const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
158   const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
159   const int is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160;
160   const bool use_hbd = cpi->oxcf.use_highbitdepth;
161 
162   if (is_480p_or_larger) {
163     sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
164     if (is_720p_or_larger)
165       sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED;
166     else
167       sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
168   } else {
169     sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
170     sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
171     if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
172   }
173 
174   if (is_4k_or_larger) {
175     sf->part_sf.default_min_partition_size = BLOCK_8X8;
176   }
177 
178   // TODO(huisu@google.com): train models for 720P and above.
179   if (!is_720p_or_larger) {
180     sf->part_sf.ml_partition_search_breakout_thresh[0] = 200;  // BLOCK_8X8
181     sf->part_sf.ml_partition_search_breakout_thresh[1] = 250;  // BLOCK_16X16
182     sf->part_sf.ml_partition_search_breakout_thresh[2] = 300;  // BLOCK_32X32
183     sf->part_sf.ml_partition_search_breakout_thresh[3] = 500;  // BLOCK_64X64
184     sf->part_sf.ml_partition_search_breakout_thresh[4] = -1;   // BLOCK_128X128
185     sf->part_sf.ml_early_term_after_part_split_level = 1;
186   }
187 
188   if (is_720p_or_larger) {
189     // TODO(chiyotsai@google.com): make this speed feature adaptive based on
190     // current block's vertical texture instead of hardcoded with resolution
191     sf->mv_sf.use_downsampled_sad = 1;
192   }
193 
194   if (speed >= 1) {
195     if (is_720p_or_larger) {
196       sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
197     } else if (is_480p_or_larger) {
198       sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
199     } else {
200       sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
201     }
202 
203     if (!is_720p_or_larger) {
204       sf->part_sf.ml_partition_search_breakout_thresh[0] = 200;  // BLOCK_8X8
205       sf->part_sf.ml_partition_search_breakout_thresh[1] = 250;  // BLOCK_16X16
206       sf->part_sf.ml_partition_search_breakout_thresh[2] = 300;  // BLOCK_32X32
207       sf->part_sf.ml_partition_search_breakout_thresh[3] = 300;  // BLOCK_64X64
208       sf->part_sf.ml_partition_search_breakout_thresh[4] = -1;  // BLOCK_128X128
209     }
210     sf->part_sf.ml_early_term_after_part_split_level = 2;
211   }
212 
213   if (speed >= 2) {
214     if (is_720p_or_larger) {
215       sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
216     } else if (is_480p_or_larger) {
217       sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
218     } else {
219       sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
220     }
221 
222     if (is_720p_or_larger) {
223       sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
224       sf->part_sf.partition_search_breakout_rate_thr = 120;
225     } else {
226       sf->part_sf.partition_search_breakout_dist_thr = (1 << 22);
227       sf->part_sf.partition_search_breakout_rate_thr = 100;
228     }
229 
230     if (is_720p_or_larger) {
231       sf->inter_sf.prune_obmc_prob_thresh = 16;
232     } else {
233       sf->inter_sf.prune_obmc_prob_thresh = 8;
234     }
235 
236     if (is_480p_or_larger) {
237       sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
238       if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
239     } else {
240       if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
241     }
242   }
243 
244   if (speed >= 3) {
245     sf->part_sf.ml_early_term_after_part_split_level = 0;
246 
247     if (is_720p_or_larger) {
248       sf->part_sf.partition_search_breakout_dist_thr = (1 << 25);
249       sf->part_sf.partition_search_breakout_rate_thr = 200;
250     } else {
251       sf->part_sf.max_intra_bsize = BLOCK_32X32;
252       sf->part_sf.partition_search_breakout_dist_thr = (1 << 23);
253       sf->part_sf.partition_search_breakout_rate_thr = 120;
254     }
255     if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
256   }
257 
258   if (speed >= 4) {
259     if (is_720p_or_larger) {
260       sf->part_sf.partition_search_breakout_dist_thr = (1 << 26);
261     } else {
262       sf->part_sf.partition_search_breakout_dist_thr = (1 << 24);
263     }
264 
265     if (is_480p_or_larger) {
266       sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 2;
267     }
268 
269     sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
270   }
271 
272   if (speed >= 5) {
273     if (is_720p_or_larger) {
274       sf->inter_sf.prune_warped_prob_thresh = 16;
275     } else if (is_480p_or_larger) {
276       sf->inter_sf.prune_warped_prob_thresh = 8;
277     }
278   }
279 
280   if (speed >= 6) {
281     if (is_720p_or_larger) {
282       sf->part_sf.auto_max_partition_based_on_simple_motion = NOT_IN_USE;
283     } else if (is_480p_or_larger) {
284       sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED;
285     }
286 
287     if (is_1080p_or_larger) {
288       sf->part_sf.default_min_partition_size = BLOCK_8X8;
289     }
290 
291     if (is_720p_or_larger) {
292       sf->inter_sf.disable_masked_comp = 1;
293     }
294 
295     if (!is_720p_or_larger) {
296       sf->inter_sf.mv_cost_upd_level = 2;
297     }
298 
299     // TODO(yunqing): use BLOCK_32X32 for >= 4k.
300     if (is_4k_or_larger) {
301       sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
302     } else if (is_720p_or_larger) {
303       sf->part_sf.use_square_partition_only_threshold = BLOCK_32X32;
304     } else {
305       sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
306     }
307 
308     if (is_720p_or_larger) {
309       sf->inter_sf.prune_ref_mv_idx_search = 2;
310     } else {
311       sf->inter_sf.prune_ref_mv_idx_search = 1;
312     }
313   }
314 }
315 
set_rt_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)316 static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
317                                                      SPEED_FEATURES *const sf,
318                                                      int speed) {
319   const AV1_COMMON *const cm = &cpi->common;
320   const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
321   const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
322   const int is_360p_or_larger = AOMMIN(cm->width, cm->height) >= 360;
323 
324   (void)is_720p_or_larger;  // Not used so far
325 
326   if (!is_360p_or_larger) {
327     if (speed >= 6) sf->rt_sf.force_tx_search_off = 1;
328     if (speed >= 8) {
329       sf->rt_sf.use_modeled_non_rd_cost = 0;
330       sf->rt_sf.use_nonrd_filter_search = 0;
331     }
332     if (speed >= 9) {
333       sf->rt_sf.use_modeled_non_rd_cost = 1;
334       sf->rt_sf.nonrd_agressive_skip = 1;
335 // TODO(kyslov) Re-enable when AV1 models are trained
336 #if 0
337 #if CONFIG_RT_ML_PARTITIONING
338       if (!frame_is_intra_only(cm)) {
339         sf->part_sf.partition_search_type = ML_BASED_PARTITION;
340         sf->rt_sf.reuse_inter_pred_nonrd = 0;
341       }
342 #endif
343 #endif
344     }
345   } else {
346     if (speed == 8 && !cpi->use_svc) {
347       sf->rt_sf.short_circuit_low_temp_var = 0;
348       sf->rt_sf.use_nonrd_altref_frame = 1;
349     }
350   }
351   if (!is_480p_or_larger) {
352     if (speed == 7) {
353       sf->rt_sf.nonrd_check_partition_merge_mode = 2;
354     }
355     if (speed >= 8) {
356       sf->mv_sf.subpel_search_method = SUBPEL_TREE;
357       sf->rt_sf.estimate_motion_for_var_based_partition = 1;
358     }
359     if (speed >= 9) {
360       sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
361       sf->rt_sf.estimate_motion_for_var_based_partition = 0;
362     }
363   }
364 }
365 
set_good_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)366 static void set_good_speed_features_framesize_independent(
367     const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
368   const AV1_COMMON *const cm = &cpi->common;
369   const GF_GROUP *const gf_group = &cpi->gf_group;
370   const int boosted = frame_is_boosted(cpi);
371   const int is_boosted_arf2_bwd_type =
372       boosted || gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
373   const int allow_screen_content_tools =
374       cm->features.allow_screen_content_tools;
375   const int use_hbd = cpi->oxcf.use_highbitdepth;
376   if (!cpi->oxcf.tile_cfg.enable_large_scale_tile) {
377     sf->hl_sf.high_precision_mv_usage = LAST_MV_DATA;
378   }
379 
380   // Speed 0 for all speed features that give neutral coding performance change.
381   sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
382 
383   sf->part_sf.less_rectangular_check_level = 1;
384   sf->part_sf.ml_prune_partition = 1;
385   sf->part_sf.prune_ext_partition_types_search_level = 1;
386   sf->part_sf.prune_part4_search = 2;
387   sf->part_sf.simple_motion_search_prune_rect = 1;
388   sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
389 
390   // TODO(debargha): Test, tweak and turn on either 1 or 2
391   sf->inter_sf.inter_mode_rd_model_estimation = 1;
392   sf->inter_sf.model_based_post_interp_filter_breakout = 1;
393   sf->inter_sf.prune_compound_using_single_ref = 1;
394   sf->inter_sf.prune_mode_search_simple_translation = 1;
395   sf->inter_sf.prune_ref_frame_for_rect_partitions =
396       (boosted || (allow_screen_content_tools))
397           ? 0
398           : (is_boosted_arf2_bwd_type ? 1 : 2);
399   sf->inter_sf.reduce_inter_modes = boosted ? 1 : 2;
400   sf->inter_sf.selective_ref_frame = 1;
401   sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
402 
403   sf->interp_sf.use_fast_interpolation_filter_search = 1;
404 
405   sf->intra_sf.intra_pruning_with_hog = 1;
406 
407   sf->tx_sf.adaptive_txb_search_level = 1;
408   sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
409   sf->tx_sf.model_based_prune_tx_search_level = 1;
410   sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
411 
412   sf->tpl_sf.search_method = NSTEP_8PT;
413 
414   sf->rt_sf.use_nonrd_pick_mode = 0;
415   sf->rt_sf.use_real_time_ref_set = 0;
416 
417   if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
418       cpi->use_screen_content_tools) {
419     sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
420   } else {
421     sf->mv_sf.exhaustive_searches_thresh = (1 << 25);
422   }
423 
424   sf->rd_sf.perform_coeff_opt = 1;
425   sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_DUAL;
426 
427   if (speed >= 1) {
428     sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
429     sf->gm_sf.prune_ref_frame_for_gm_search = boosted ? 0 : 1;
430 
431     sf->part_sf.intra_cnn_split = 1;
432     sf->part_sf.simple_motion_search_early_term_none = 1;
433     // TODO(Venkat): Clean-up frame type dependency for
434     // simple_motion_search_split in partition search function and set the
435     // speed feature accordingly
436     sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
437     sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
438 
439     sf->mv_sf.exhaustive_searches_thresh <<= 1;
440     sf->mv_sf.obmc_full_pixel_search_level = 1;
441     sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
442     sf->mv_sf.disable_extensive_joint_motion_search = 1;
443 
444     sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
445     sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1;
446     sf->inter_sf.prune_comp_type_by_comp_avg = 1;
447     sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1;
448     sf->inter_sf.prune_ref_frame_for_rect_partitions =
449         (frame_is_intra_only(&cpi->common) || (allow_screen_content_tools))
450             ? 0
451             : (boosted ? 1 : 2);
452     sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
453     sf->inter_sf.reuse_inter_intra_mode = 1;
454     sf->inter_sf.selective_ref_frame = 2;
455     sf->inter_sf.skip_repeated_newmv = 1;
456 
457     sf->interp_sf.use_interp_filter = 1;
458 
459     sf->intra_sf.prune_palette_search_level = 1;
460 
461     sf->tx_sf.adaptive_txb_search_level = 2;
462     sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
463     sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
464     sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
465     sf->tx_sf.model_based_prune_tx_search_level = 0;
466     sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
467     sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
468     sf->tx_sf.tx_type_search.skip_tx_search = 1;
469 
470     sf->rd_sf.perform_coeff_opt = boosted ? 2 : 3;
471     sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
472     sf->rd_sf.tx_domain_dist_thres_level = 1;
473 
474     sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
475     sf->lpf_sf.dual_sgr_penalty_level = 1;
476     sf->lpf_sf.enable_sgr_ep_pruning = 1;
477 
478     // TODO(any, yunqing): move this feature to speed 0.
479     sf->tpl_sf.skip_alike_starting_mv = 1;
480   }
481 
482   if (speed >= 2) {
483     sf->part_sf.allow_partition_search_skip = 1;
484 
485     sf->mv_sf.auto_mv_step_size = 1;
486     sf->mv_sf.subpel_iters_per_step = 1;
487 
488     // TODO(chiyotsai@google.com): We can get 10% speed up if we move
489     // adaptive_rd_thresh to speed 1. But currently it performs poorly on some
490     // clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a
491     // bit more closely to figure out why.
492     sf->inter_sf.adaptive_rd_thresh = 1;
493     sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
494     sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
495     sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
496     sf->inter_sf.fast_interintra_wedge_search = 1;
497     sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
498     sf->inter_sf.prune_compound_using_neighbors = 1;
499     sf->inter_sf.prune_comp_using_best_single_mode_ref = 2;
500     sf->inter_sf.prune_comp_type_by_comp_avg = 2;
501     sf->inter_sf.reuse_best_prediction_for_part_ab = 1;
502     sf->inter_sf.selective_ref_frame = 3;
503     sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
504     // Enable fast search only for COMPOUND_DIFFWTD type.
505     sf->inter_sf.enable_fast_compound_mode_search = 1;
506 
507     // TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3
508     sf->interp_sf.adaptive_interp_filter_search = 1;
509     sf->interp_sf.disable_dual_filter = 1;
510 
511     sf->intra_sf.disable_smooth_intra =
512         !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key > 1);
513     sf->intra_sf.intra_pruning_with_hog = 2;
514 
515     sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 3 : 4;
516 
517     sf->lpf_sf.prune_wiener_based_on_src_var = 1;
518     sf->lpf_sf.prune_sgr_based_on_wiener = 1;
519 
520     // TODO(any): Move this from speed 3 to speed 2 so that TPL multithread
521     // is re-enabled at speed 2. This also makes encoder faster. After TPL MT is
522     // fixed and works with compound pred, we can re-evaluate this feature.
523     sf->tpl_sf.allow_compound_pred = 0;
524     sf->tpl_sf.prune_ref_frames_in_tpl = 1;
525   }
526 
527   if (speed >= 3) {
528     sf->hl_sf.high_precision_mv_usage = CURRENT_Q;
529     sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
530 
531     sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
532 
533     sf->part_sf.less_rectangular_check_level = 2;
534     sf->part_sf.simple_motion_search_prune_agg = 1;
535     sf->part_sf.prune_ext_part_using_split_info = 1;
536 
537     // adaptive_motion_search breaks encoder multi-thread tests.
538     // The values in x->pred_mv[] differ for single and multi-thread cases.
539     // See aomedia:1778.
540     // sf->mv_sf.adaptive_motion_search = 1;
541     sf->mv_sf.full_pixel_search_level = 1;
542     sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL;
543     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
544     sf->mv_sf.search_method = DIAMOND;
545 
546     sf->inter_sf.mv_cost_upd_level = 1;
547     // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
548     // it with cpi->sf.disable_wedge_search_var_thresh.
549     sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
550     // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
551     // and clean-up the speed feature
552     sf->inter_sf.perform_best_rd_based_gating_for_chroma = 1;
553     sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 1;
554     sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2;
555     sf->inter_sf.selective_ref_frame = 5;
556     sf->inter_sf.skip_repeated_ref_mv = 1;
557     sf->inter_sf.skip_repeated_full_newmv = 1;
558     sf->inter_sf.reuse_compound_type_decision = 1;
559     sf->inter_sf.txfm_rd_gate_level =
560         boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
561 
562     // TODO(chiyotsai@google.com): the thresholds chosen for intra hog are
563     // inherited directly from luma hog with some minor tweaking. Eventually we
564     // should run this with a bayesian optimizer to find the Pareto frontier.
565     sf->intra_sf.chroma_intra_pruning_with_hog = 2;
566     sf->intra_sf.intra_pruning_with_hog = 3;
567     sf->intra_sf.prune_palette_search_level = 2;
568 
569     sf->tpl_sf.skip_alike_starting_mv = 2;
570     sf->tpl_sf.prune_intra_modes = 1;
571     sf->tpl_sf.prune_starting_mv = 1;
572     sf->tpl_sf.reduce_first_step_size = 6;
573     sf->tpl_sf.subpel_force_stop = QUARTER_PEL;
574     sf->tpl_sf.search_method = DIAMOND;
575 
576     sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
577     sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
578     sf->tx_sf.use_intra_txb_hash = 1;
579 
580     // TODO(any): Refactor the code related to following winner mode speed
581     // features
582     sf->winner_mode_sf.enable_winner_mode_for_coeff_opt = 1;
583     // TODO(any): Experiment with this speed feature by enabling for key frames
584     sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch =
585         frame_is_intra_only(&cpi->common) ? 0 : 1;
586     sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
587     sf->winner_mode_sf.motion_mode_for_winner_cand =
588         boosted
589             ? 0
590             : gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE ? 1
591                                                                          : 2;
592 
593     // TODO(any): evaluate if these lpf features can be moved to speed 2.
594     // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
595     // loss.
596     sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
597     sf->lpf_sf.disable_loop_restoration_chroma = boosted ? 0 : 1;
598     sf->lpf_sf.reduce_wiener_window_size = !boosted;
599     sf->lpf_sf.prune_wiener_based_on_src_var = 2;
600   }
601 
602   if (speed >= 4) {
603     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
604 
605     sf->part_sf.simple_motion_search_prune_agg = 2;
606     sf->part_sf.simple_motion_search_reduce_search_steps = 4;
607     sf->part_sf.prune_ext_part_using_split_info = 2;
608     sf->part_sf.early_term_after_none_split = 1;
609     sf->part_sf.ml_predict_breakout_level = 3;
610 
611     sf->inter_sf.alt_ref_search_fp = 1;
612     sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 4;
613 
614     sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2;
615     sf->inter_sf.prune_compound_using_neighbors = 2;
616     sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
617     sf->inter_sf.disable_onesided_comp = 1;
618 
619     sf->interp_sf.cb_pred_filter_search = 1;
620     sf->interp_sf.skip_sharp_interp_filter_search = 1;
621     sf->interp_sf.use_interp_filter = 2;
622     sf->interp_sf.adaptive_interp_filter_search = 2;
623 
624     sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
625     sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
626     sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
627     sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
628     sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
629     sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
630     // TODO(any): Experiment with this speed feature set to 2 for higher quality
631     // presets as well
632     sf->intra_sf.skip_intra_in_interframe = 2;
633 
634     sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
635 
636     sf->tpl_sf.prune_starting_mv = 2;
637     sf->tpl_sf.subpel_force_stop = HALF_PEL;
638     sf->tpl_sf.search_method = FAST_BIGDIA;
639 
640     sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
641     sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
642     sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
643     sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1;
644     // TODO(any): Experiment with enabling of this speed feature as hash state
645     // is reset during winner mode processing
646     sf->tx_sf.use_intra_txb_hash = 0;
647 
648     sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 5 : 7;
649     sf->rd_sf.tx_domain_dist_thres_level = 2;
650 
651     // TODO(any): Extend multi-winner mode processing support for inter frames
652     sf->winner_mode_sf.multi_winner_mode_type =
653         frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_DEFAULT
654                                           : MULTI_WINNER_MODE_OFF;
655     sf->winner_mode_sf.enable_winner_mode_for_tx_size_srch = 1;
656 
657     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_FULL_IMAGE_NON_DUAL;
658     sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL3;
659 
660     sf->mv_sf.reduce_search_range = 1;
661   }
662 
663   if (speed >= 5) {
664     sf->part_sf.simple_motion_search_prune_agg = 3;
665     sf->part_sf.ext_partition_eval_thresh =
666         allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
667 
668     sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
669     sf->inter_sf.prune_inter_modes_if_skippable = 1;
670     sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 5;
671     // Enable fast search for all valid compound modes.
672     sf->inter_sf.enable_fast_compound_mode_search = 2;
673 
674     sf->intra_sf.chroma_intra_pruning_with_hog = 3;
675 
676     // TODO(any): Extend multi-winner mode processing support for inter frames
677     sf->winner_mode_sf.multi_winner_mode_type =
678         frame_is_intra_only(&cpi->common) ? MULTI_WINNER_MODE_FAST
679                                           : MULTI_WINNER_MODE_OFF;
680 
681     sf->lpf_sf.use_coarse_filter_level_search =
682         frame_is_intra_only(&cpi->common) ? 0 : 1;
683     sf->lpf_sf.disable_lr_filter = 1;
684 
685     sf->mv_sf.prune_mesh_search = 1;
686 
687     sf->tpl_sf.prune_starting_mv = 3;
688 
689     sf->winner_mode_sf.dc_blk_pred_level = 1;
690   }
691 
692   if (speed >= 6) {
693     sf->hl_sf.disable_extra_sc_testing = 1;
694     sf->hl_sf.second_alt_ref_filtering = 0;
695     sf->hl_sf.recode_tolerance = 55;
696 
697     sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3;
698     sf->inter_sf.prune_nearmv_using_neighbors = 1;
699 
700     sf->intra_sf.chroma_intra_pruning_with_hog = 4;
701     sf->intra_sf.intra_pruning_with_hog = 4;
702 
703     sf->part_sf.prune_rectangular_split_based_on_qidx =
704         boosted || allow_screen_content_tools ? 0 : 1;
705     sf->part_sf.prune_sub_8x8_partition_level =
706         allow_screen_content_tools ? 0
707                                    : frame_is_intra_only(&cpi->common) ? 1 : 2;
708     sf->part_sf.prune_part4_search = 3;
709 
710     sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
711     sf->mv_sf.use_bsize_dependent_search_method = 1;
712 
713     sf->tpl_sf.disable_gop_length_decision = 1;
714     sf->tpl_sf.subpel_force_stop = FULL_PEL;
715     sf->tpl_sf.disable_filtered_key_tpl = 1;
716 
717     sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
718     sf->tx_sf.use_intra_txb_hash = 1;
719     sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
720 
721     sf->rd_sf.perform_coeff_opt = is_boosted_arf2_bwd_type ? 6 : 8;
722 
723     sf->winner_mode_sf.dc_blk_pred_level = 2;
724     sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
725 
726     sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
727   }
728 
729   // Intra txb hash is currently not compatible with multi-winner mode as the
730   // hashes got reset during multi-winner mode processing.
731   assert(IMPLIES(
732       sf->winner_mode_sf.multi_winner_mode_type != MULTI_WINNER_MODE_OFF,
733       !sf->tx_sf.use_intra_txb_hash));
734 }
735 
736 // TODO(kyslov): now this is very similar to
737 // set_good_speed_features_framesize_independent
738 //               except it sets non-rd flag on speed8. This function will likely
739 //               be modified in the future with RT-specific speed features
set_rt_speed_features_framesize_independent(AV1_COMP * cpi,SPEED_FEATURES * sf,int speed)740 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
741                                                         SPEED_FEATURES *sf,
742                                                         int speed) {
743   AV1_COMMON *const cm = &cpi->common;
744   const int boosted = frame_is_boosted(cpi);
745 
746   // Speed 0 for all speed features that give neutral coding performance change.
747   sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
748 
749   sf->part_sf.less_rectangular_check_level = 1;
750   sf->part_sf.ml_prune_partition = 1;
751   sf->part_sf.prune_ext_partition_types_search_level = 1;
752 
753   // TODO(debargha): Test, tweak and turn on either 1 or 2
754   sf->inter_sf.inter_mode_rd_model_estimation = 0;
755   sf->inter_sf.disable_interintra_wedge_var_thresh = 0;
756   sf->inter_sf.disable_interinter_wedge_var_thresh = 0;
757   sf->inter_sf.model_based_post_interp_filter_breakout = 1;
758   sf->inter_sf.prune_compound_using_single_ref = 0;
759   sf->inter_sf.prune_mode_search_simple_translation = 1;
760   sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted;
761   sf->inter_sf.reduce_inter_modes = 1;
762   sf->inter_sf.selective_ref_frame = 1;
763   sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
764 
765   sf->interp_sf.use_fast_interpolation_filter_search = 1;
766 
767   sf->intra_sf.intra_pruning_with_hog = 1;
768 
769   sf->mv_sf.full_pixel_search_level = 1;
770   sf->mv_sf.exhaustive_searches_thresh = INT_MAX;
771 
772   sf->rt_sf.check_intra_pred_nonrd = 1;
773   sf->rt_sf.estimate_motion_for_var_based_partition = 1;
774   sf->rt_sf.hybrid_intra_pickmode = 0;
775   sf->rt_sf.nonrd_prune_ref_frame_search = 0;
776   sf->rt_sf.reuse_inter_pred_nonrd = 0;
777   sf->rt_sf.use_comp_ref_nonrd = 1;
778   sf->rt_sf.use_nonrd_filter_search = 1;
779   sf->rt_sf.use_nonrd_pick_mode = 0;
780   sf->rt_sf.use_real_time_ref_set = 0;
781   sf->rt_sf.check_scene_detection = 0;
782   sf->rt_sf.overshoot_detection_cbr = NO_DETECTION;
783   sf->tx_sf.adaptive_txb_search_level = 1;
784   sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
785   sf->tx_sf.model_based_prune_tx_search_level = 1;
786   sf->tx_sf.tx_type_search.use_reduced_intra_txset = 1;
787   sf->rt_sf.fullpel_search_step_param = 0;
788   sf->rt_sf.skip_loopfilter_non_reference = 0;
789 
790   sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_SOLO;
791 
792   if (speed >= 1) {
793     sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
794 
795     sf->part_sf.prune_ext_partition_types_search_level = 2;
796     sf->part_sf.simple_motion_search_prune_rect = 1;
797 
798     sf->mv_sf.obmc_full_pixel_search_level = 1;
799     sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
800 
801     sf->inter_sf.prune_comp_search_by_single_result = 1;
802     sf->inter_sf.reuse_inter_intra_mode = 1;
803     sf->inter_sf.selective_ref_frame = 2;
804     sf->inter_sf.skip_repeated_newmv = 1;
805     sf->inter_sf.disable_interintra_wedge_var_thresh = 0;
806     sf->inter_sf.disable_interinter_wedge_var_thresh = 0;
807     sf->inter_sf.prune_comp_type_by_comp_avg = 1;
808 
809     sf->interp_sf.cb_pred_filter_search = 1;
810     sf->interp_sf.use_interp_filter = 1;
811 
812     sf->tx_sf.adaptive_txb_search_level = 2;
813     sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
814     sf->tx_sf.tx_size_search_lgr_block = 1;
815     sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
816     sf->tx_sf.tx_type_search.skip_tx_search = 1;
817     sf->tx_sf.use_intra_txb_hash = 1;
818 
819     sf->rd_sf.tx_domain_dist_level = boosted ? 0 : 1;
820     sf->rd_sf.tx_domain_dist_thres_level = 1;
821 
822     sf->lpf_sf.dual_sgr_penalty_level = 1;
823   }
824 
825   if (speed >= 2) {
826     sf->part_sf.allow_partition_search_skip = 1;
827     sf->part_sf.partition_search_breakout_rate_thr = 80;
828 
829     sf->mv_sf.auto_mv_step_size = 1;
830     sf->mv_sf.subpel_iters_per_step = 1;
831 
832     sf->inter_sf.adaptive_rd_thresh = 1;
833     sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
834     sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
835     sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
836     sf->inter_sf.fast_wedge_sign_estimate = 1;
837     sf->inter_sf.prune_comp_type_by_comp_avg = 2;
838     sf->inter_sf.selective_ref_frame = 3;
839     sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
840 
841     sf->interp_sf.adaptive_interp_filter_search = 1;
842     sf->interp_sf.cb_pred_filter_search = 0;
843     sf->interp_sf.disable_dual_filter = 1;
844 
845     sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
846     sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
847     sf->tx_sf.model_based_prune_tx_search_level = 0;
848 
849     sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL1;
850   }
851 
852   if (speed >= 3) {
853     sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
854 
855     sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
856 
857     sf->part_sf.less_rectangular_check_level = 2;
858 
859     sf->mv_sf.use_accurate_subpel_search = USE_2_TAPS;
860     // adaptive_motion_search breaks encoder multi-thread tests.
861     // The values in x->pred_mv[] differ for single and multi-thread cases.
862     // See aomedia:1778.
863     // sf->mv_sf.adaptive_motion_search = 1;
864 
865     sf->inter_sf.adaptive_rd_thresh = 2;
866     sf->inter_sf.mv_cost_upd_level = 1;
867     // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
868     // it with cpi->sf.disable_wedge_search_var_thresh.
869     sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
870     sf->inter_sf.prune_comp_search_by_single_result = 2;
871     sf->inter_sf.selective_ref_frame = 4;
872 
873     sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
874 
875     sf->rd_sf.tx_domain_dist_level = 1;
876 
877     sf->winner_mode_sf.tx_size_search_level = boosted ? 0 : 2;
878   }
879 
880   if (speed >= 4) {
881     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
882 
883     sf->inter_sf.alt_ref_search_fp = 1;
884 
885     sf->interp_sf.skip_sharp_interp_filter_search = 1;
886 
887     sf->tx_sf.tx_type_search.fast_inter_tx_type_search = 1;
888     sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
889     sf->tx_sf.use_intra_txb_hash = 0;
890 
891     sf->rd_sf.use_mb_rd_hash = 0;
892 
893     sf->winner_mode_sf.tx_size_search_level = frame_is_intra_only(cm) ? 0 : 2;
894   }
895 
896   if (speed >= 5) {
897     sf->inter_sf.adaptive_rd_thresh = 4;
898 
899     sf->rd_sf.tx_domain_dist_level = 2;
900     sf->rd_sf.tx_domain_dist_thres_level = 2;
901     sf->winner_mode_sf.tx_size_search_level = 1;
902 
903     sf->rt_sf.mode_search_skip_flags =
904         (cm->current_frame.frame_type == KEY_FRAME)
905             ? 0
906             : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
907                   FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
908                   FLAG_EARLY_TERMINATE;
909     sf->hl_sf.frame_parameter_update = 0;
910 
911     sf->part_sf.default_max_partition_size = BLOCK_128X128;
912     sf->part_sf.default_min_partition_size = BLOCK_8X8;
913     sf->part_sf.max_intra_bsize = BLOCK_32X32;
914     sf->part_sf.partition_search_breakout_rate_thr = 500;
915     sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
916     sf->part_sf.adjust_var_based_rd_partitioning = 2;
917 
918     sf->mv_sf.search_method = FAST_DIAMOND;
919     sf->mv_sf.subpel_force_stop = QUARTER_PEL;
920     sf->mv_sf.use_fullpel_costlist = 1;
921     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
922 
923     sf->inter_sf.inter_mode_rd_model_estimation = 2;
924 
925     for (int i = 0; i < TX_SIZES; ++i) {
926       sf->intra_sf.intra_y_mode_mask[i] = INTRA_DC;
927       sf->intra_sf.intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
928     }
929 
930     sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
931     sf->tx_sf.use_inter_txb_hash = 0;
932     sf->tx_sf.refine_fast_tx_search_results = 0;
933 
934     sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
935     sf->rd_sf.simple_model_rd_from_var = 1;
936 
937     sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
938     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
939 
940     sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
941     sf->rt_sf.num_inter_modes_for_tx_search = 5;
942     sf->rt_sf.skip_interp_filter_search = 1;
943     sf->rt_sf.use_comp_ref_nonrd = 0;
944     sf->rt_sf.use_real_time_ref_set = 1;
945     sf->rt_sf.use_simple_rd_model = 1;
946 
947     sf->rt_sf.check_scene_detection = 1;
948     if (cm->current_frame.frame_type != KEY_FRAME &&
949         cpi->oxcf.rc_cfg.mode == AOM_CBR)
950       sf->rt_sf.overshoot_detection_cbr = FAST_DETECTION_MAXQ;
951     // Enable noise estimation only for high resolutions for now.
952     if (cm->width * cm->height > 640 * 480)
953       sf->rt_sf.use_temporal_noise_estimate = 1;
954   }
955 
956   if (speed >= 6) {
957     sf->part_sf.adjust_var_based_rd_partitioning = 1;
958     sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
959   }
960 
961   if (speed >= 7) {
962     sf->part_sf.default_max_partition_size = BLOCK_128X128;
963     sf->part_sf.default_min_partition_size = BLOCK_8X8;
964     sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
965 
966     sf->mv_sf.search_method = FAST_DIAMOND;
967     sf->mv_sf.subpel_force_stop = QUARTER_PEL;
968     sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
969 
970     sf->inter_sf.inter_mode_rd_model_estimation = 2;
971 
972     sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
973 
974     sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
975     sf->rt_sf.nonrd_prune_ref_frame_search = 1;
976     sf->rt_sf.reuse_inter_pred_nonrd = 0;
977     sf->rt_sf.short_circuit_low_temp_var = 0;
978     sf->rt_sf.skip_interp_filter_search = 0;
979     sf->rt_sf.use_comp_ref_nonrd = 0;
980     // For spatial layers, only LAST and GOLDEN are currently used in the SVC
981     // for nonrd. The flag use_nonrd_altref_frame can disable GOLDEN in the
982     // get_ref_frame_flags() for some patterns, so disable it here for
983     // spatial layers.
984     sf->rt_sf.use_nonrd_altref_frame =
985         (cpi->svc.number_spatial_layers > 1) ? 0 : 1;
986     sf->rt_sf.use_nonrd_pick_mode = 1;
987     sf->rt_sf.nonrd_check_partition_merge_mode = 1;
988     sf->rt_sf.nonrd_check_partition_split = 0;
989     sf->rt_sf.hybrid_intra_pickmode = 1;
990     sf->rt_sf.skip_intra_pred_if_tx_skip = 1;
991     // For SVC: use better mv search on base temporal layer, and only
992     // on base spatial layer if highest resolution is above 640x360.
993     if (cpi->svc.number_temporal_layers > 1) {
994       if (cpi->svc.temporal_layer_id == 0 &&
995           (cpi->svc.spatial_layer_id == 0 ||
996            cpi->oxcf.frm_dim_cfg.width * cpi->oxcf.frm_dim_cfg.height <=
997                640 * 360)) {
998         sf->mv_sf.search_method = NSTEP;
999         sf->mv_sf.subpel_search_method = SUBPEL_TREE;
1000         sf->rt_sf.fullpel_search_step_param = 6;
1001       } else if (cpi->svc.non_reference_frame) {
1002         sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
1003         sf->rt_sf.fullpel_search_step_param = 10;
1004       }
1005     }
1006     // TODO(marpan): Look into why enabling skip_loopfilter_non_reference is
1007     // not bitexact on rtc testset, its very close (< ~0.01 bdrate), but not
1008     // always bitexact.
1009     if (cpi->use_svc && cpi->svc.non_reference_frame &&
1010         sf->lpf_sf.cdef_pick_method == CDEF_PICK_FROM_Q &&
1011         sf->lpf_sf.lpf_pick == LPF_PICK_FROM_Q)
1012       sf->rt_sf.skip_loopfilter_non_reference = 1;
1013     // Set mask for intra modes.
1014     for (int i = 0; i < BLOCK_SIZES; ++i)
1015       if (i >= BLOCK_32X32)
1016         sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1017       else
1018         // Use DC, H, V intra mode for block sizes < 32X32.
1019         sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
1020   }
1021 
1022   if (speed >= 8) {
1023     sf->rt_sf.estimate_motion_for_var_based_partition = 1;
1024     sf->rt_sf.short_circuit_low_temp_var = 1;
1025 #if !CONFIG_AV1_TEMPORAL_DENOISING
1026     sf->rt_sf.reuse_inter_pred_nonrd = 1;
1027 #endif
1028     sf->rt_sf.use_nonrd_altref_frame = 0;
1029     sf->rt_sf.nonrd_prune_ref_frame_search = 2;
1030     sf->rt_sf.nonrd_check_partition_merge_mode = 0;
1031     sf->rt_sf.nonrd_check_partition_split = 0;
1032     sf->rt_sf.use_modeled_non_rd_cost = 1;
1033     sf->rt_sf.source_metrics_sb_nonrd = 1;
1034     sf->rt_sf.skip_intra_pred_if_tx_skip = 0;
1035     sf->interp_sf.cb_pred_filter_search = 1;
1036   }
1037   if (speed >= 9) {
1038     sf->rt_sf.estimate_motion_for_var_based_partition = 0;
1039     sf->rt_sf.force_large_partition_blocks = 1;
1040     for (int i = 0; i < BLOCK_SIZES; ++i)
1041       sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
1042   }
1043 }
1044 
init_hl_sf(HIGH_LEVEL_SPEED_FEATURES * hl_sf)1045 static AOM_INLINE void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
1046   // best quality defaults
1047   hl_sf->frame_parameter_update = 1;
1048   hl_sf->recode_loop = ALLOW_RECODE;
1049   // Recode loop tolerance %.
1050   hl_sf->recode_tolerance = 25;
1051   hl_sf->high_precision_mv_usage = CURRENT_Q;
1052   hl_sf->superres_auto_search_type = SUPERRES_AUTO_ALL;
1053   hl_sf->disable_extra_sc_testing = 0;
1054   hl_sf->second_alt_ref_filtering = 1;
1055 }
1056 
init_tpl_sf(TPL_SPEED_FEATURES * tpl_sf)1057 static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
1058   tpl_sf->disable_gop_length_decision = 0;
1059   tpl_sf->prune_intra_modes = 0;
1060   tpl_sf->prune_starting_mv = 0;
1061   tpl_sf->reduce_first_step_size = 0;
1062   tpl_sf->skip_alike_starting_mv = 0;
1063   tpl_sf->subpel_force_stop = EIGHTH_PEL;
1064   tpl_sf->search_method = NSTEP;
1065   tpl_sf->disable_filtered_key_tpl = 0;
1066   tpl_sf->prune_ref_frames_in_tpl = 0;
1067   tpl_sf->allow_compound_pred = 1;
1068 }
1069 
init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES * gm_sf)1070 static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
1071   gm_sf->gm_search_type = GM_FULL_SEARCH;
1072   gm_sf->prune_ref_frame_for_gm_search = 0;
1073 }
1074 
init_part_sf(PARTITION_SPEED_FEATURES * part_sf)1075 static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
1076   part_sf->partition_search_type = SEARCH_PARTITION;
1077   part_sf->less_rectangular_check_level = 0;
1078   part_sf->use_square_partition_only_threshold = BLOCK_128X128;
1079   part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
1080   part_sf->default_max_partition_size = BLOCK_LARGEST;
1081   part_sf->default_min_partition_size = BLOCK_4X4;
1082   part_sf->adjust_var_based_rd_partitioning = 0;
1083   part_sf->allow_partition_search_skip = 0;
1084   part_sf->max_intra_bsize = BLOCK_LARGEST;
1085   // This setting only takes effect when partition_search_type is set
1086   // to FIXED_PARTITION.
1087   part_sf->fixed_partition_size = BLOCK_16X16;
1088   // Recode loop tolerance %.
1089   part_sf->partition_search_breakout_dist_thr = 0;
1090   part_sf->partition_search_breakout_rate_thr = 0;
1091   part_sf->prune_ext_partition_types_search_level = 0;
1092   part_sf->prune_part4_search = 0;
1093   part_sf->ml_prune_partition = 0;
1094   part_sf->ml_early_term_after_part_split_level = 0;
1095   for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
1096     part_sf->ml_partition_search_breakout_thresh[i] =
1097         -1;  // -1 means not enabled.
1098   }
1099   part_sf->simple_motion_search_prune_agg = 0;
1100   part_sf->simple_motion_search_split = 0;
1101   part_sf->simple_motion_search_prune_rect = 0;
1102   part_sf->simple_motion_search_early_term_none = 0;
1103   part_sf->simple_motion_search_reduce_search_steps = 0;
1104   part_sf->intra_cnn_split = 0;
1105   part_sf->ext_partition_eval_thresh = BLOCK_8X8;
1106   part_sf->prune_ext_part_using_split_info = 0;
1107   part_sf->prune_rectangular_split_based_on_qidx = 0;
1108   part_sf->early_term_after_none_split = 0;
1109   part_sf->ml_predict_breakout_level = 0;
1110   part_sf->prune_sub_8x8_partition_level = 0;
1111 }
1112 
init_mv_sf(MV_SPEED_FEATURES * mv_sf)1113 static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
1114   mv_sf->full_pixel_search_level = 0;
1115   mv_sf->auto_mv_step_size = 0;
1116   mv_sf->exhaustive_searches_thresh = 0;
1117   mv_sf->obmc_full_pixel_search_level = 0;
1118   mv_sf->prune_mesh_search = 0;
1119   mv_sf->reduce_search_range = 0;
1120   mv_sf->search_method = NSTEP;
1121   mv_sf->simple_motion_subpel_force_stop = EIGHTH_PEL;
1122   mv_sf->subpel_force_stop = EIGHTH_PEL;
1123   mv_sf->subpel_iters_per_step = 2;
1124   mv_sf->subpel_search_method = SUBPEL_TREE;
1125   mv_sf->use_accurate_subpel_search = USE_8_TAPS;
1126   mv_sf->use_bsize_dependent_search_method = 0;
1127   mv_sf->use_fullpel_costlist = 0;
1128   mv_sf->use_downsampled_sad = 0;
1129   mv_sf->disable_extensive_joint_motion_search = 0;
1130 }
1131 
init_inter_sf(INTER_MODE_SPEED_FEATURES * inter_sf)1132 static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
1133   inter_sf->comp_inter_joint_search_thresh = BLOCK_4X4;
1134   inter_sf->adaptive_rd_thresh = 0;
1135   inter_sf->model_based_post_interp_filter_breakout = 0;
1136   inter_sf->reduce_inter_modes = 0;
1137   inter_sf->alt_ref_search_fp = 0;
1138   inter_sf->selective_ref_frame = 0;
1139   inter_sf->prune_ref_frame_for_rect_partitions = 0;
1140   inter_sf->fast_wedge_sign_estimate = 0;
1141   inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
1142   inter_sf->reuse_inter_intra_mode = 0;
1143   inter_sf->mv_cost_upd_level = 0;
1144   inter_sf->prune_inter_modes_based_on_tpl = 0;
1145   inter_sf->prune_nearmv_using_neighbors = 0;
1146   inter_sf->prune_comp_search_by_single_result = 0;
1147   inter_sf->skip_repeated_ref_mv = 0;
1148   inter_sf->skip_repeated_newmv = 0;
1149   inter_sf->skip_repeated_full_newmv = 0;
1150   inter_sf->inter_mode_rd_model_estimation = 0;
1151   inter_sf->prune_compound_using_single_ref = 0;
1152   inter_sf->prune_compound_using_neighbors = 0;
1153   inter_sf->prune_comp_using_best_single_mode_ref = 0;
1154   inter_sf->disable_onesided_comp = 0;
1155   inter_sf->prune_mode_search_simple_translation = 0;
1156   inter_sf->prune_comp_type_by_comp_avg = 0;
1157   inter_sf->disable_interinter_wedge_newmv_search = 0;
1158   inter_sf->fast_interintra_wedge_search = 0;
1159   inter_sf->prune_comp_type_by_model_rd = 0;
1160   inter_sf->perform_best_rd_based_gating_for_chroma = 0;
1161   inter_sf->prune_obmc_prob_thresh = 0;
1162   inter_sf->disable_interinter_wedge_var_thresh = 0;
1163   inter_sf->disable_interintra_wedge_var_thresh = 0;
1164   inter_sf->prune_ref_mv_idx_search = 0;
1165   inter_sf->prune_warped_prob_thresh = 0;
1166   inter_sf->reuse_compound_type_decision = 0;
1167   inter_sf->txfm_rd_gate_level = 0;
1168   inter_sf->prune_inter_modes_if_skippable = 0;
1169   inter_sf->disable_masked_comp = 0;
1170   inter_sf->reuse_best_prediction_for_part_ab = 0;
1171   inter_sf->enable_fast_compound_mode_search = 0;
1172 }
1173 
init_interp_sf(INTERP_FILTER_SPEED_FEATURES * interp_sf)1174 static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
1175   interp_sf->adaptive_interp_filter_search = 0;
1176   interp_sf->cb_pred_filter_search = 0;
1177   interp_sf->disable_dual_filter = 0;
1178   interp_sf->skip_sharp_interp_filter_search = 0;
1179   interp_sf->use_fast_interpolation_filter_search = 0;
1180   interp_sf->use_interp_filter = 0;
1181 }
1182 
init_intra_sf(INTRA_MODE_SPEED_FEATURES * intra_sf)1183 static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
1184   intra_sf->chroma_intra_pruning_with_hog = 0;
1185   intra_sf->skip_intra_in_interframe = 1;
1186   intra_sf->intra_pruning_with_hog = 0;
1187   intra_sf->prune_palette_search_level = 0;
1188 
1189   for (int i = 0; i < TX_SIZES; i++) {
1190     intra_sf->intra_y_mode_mask[i] = INTRA_ALL;
1191     intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
1192   }
1193   intra_sf->disable_smooth_intra = 0;
1194 }
1195 
init_tx_sf(TX_SPEED_FEATURES * tx_sf)1196 static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
1197   tx_sf->inter_tx_size_search_init_depth_sqr = 0;
1198   tx_sf->inter_tx_size_search_init_depth_rect = 0;
1199   tx_sf->intra_tx_size_search_init_depth_rect = 0;
1200   tx_sf->intra_tx_size_search_init_depth_sqr = 0;
1201   tx_sf->tx_size_search_lgr_block = 0;
1202   tx_sf->model_based_prune_tx_search_level = 0;
1203   tx_sf->tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_1;
1204   tx_sf->tx_type_search.ml_tx_split_thresh = 8500;
1205   tx_sf->tx_type_search.use_skip_flag_prediction = 1;
1206   tx_sf->tx_type_search.use_reduced_intra_txset = 0;
1207   tx_sf->tx_type_search.fast_intra_tx_type_search = 0;
1208   tx_sf->tx_type_search.fast_inter_tx_type_search = 0;
1209   tx_sf->tx_type_search.skip_tx_search = 0;
1210   tx_sf->tx_type_search.prune_tx_type_using_stats = 0;
1211   tx_sf->tx_type_search.prune_tx_type_est_rd = 0;
1212   tx_sf->tx_type_search.winner_mode_tx_type_pruning = 0;
1213   tx_sf->txb_split_cap = 1;
1214   tx_sf->adaptive_txb_search_level = 0;
1215   tx_sf->use_intra_txb_hash = 0;
1216   tx_sf->use_inter_txb_hash = 1;
1217   tx_sf->refine_fast_tx_search_results = 1;
1218   tx_sf->prune_tx_size_level = 0;
1219 }
1220 
init_rd_sf(RD_CALC_SPEED_FEATURES * rd_sf,const AV1EncoderConfig * oxcf)1221 static AOM_INLINE void init_rd_sf(RD_CALC_SPEED_FEATURES *rd_sf,
1222                                   const AV1EncoderConfig *oxcf) {
1223   const int disable_trellis_quant = oxcf->algo_cfg.disable_trellis_quant;
1224   if (disable_trellis_quant == 3) {
1225     rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
1226                                        ? NO_ESTIMATE_YRD_TRELLIS_OPT
1227                                        : NO_TRELLIS_OPT;
1228   } else if (disable_trellis_quant == 2) {
1229     rd_sf->optimize_coefficients = !is_lossless_requested(&oxcf->rc_cfg)
1230                                        ? FINAL_PASS_TRELLIS_OPT
1231                                        : NO_TRELLIS_OPT;
1232   } else if (disable_trellis_quant == 0) {
1233     if (is_lossless_requested(&oxcf->rc_cfg)) {
1234       rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
1235     } else {
1236       rd_sf->optimize_coefficients = FULL_TRELLIS_OPT;
1237     }
1238   } else if (disable_trellis_quant == 1) {
1239     rd_sf->optimize_coefficients = NO_TRELLIS_OPT;
1240   } else {
1241     assert(0 && "Invalid disable_trellis_quant value");
1242   }
1243   rd_sf->use_mb_rd_hash = 1;
1244   rd_sf->simple_model_rd_from_var = 0;
1245   rd_sf->tx_domain_dist_level = 0;
1246   rd_sf->tx_domain_dist_thres_level = 0;
1247   rd_sf->perform_coeff_opt = 0;
1248 }
1249 
init_winner_mode_sf(WINNER_MODE_SPEED_FEATURES * winner_mode_sf)1250 static AOM_INLINE void init_winner_mode_sf(
1251     WINNER_MODE_SPEED_FEATURES *winner_mode_sf) {
1252   winner_mode_sf->motion_mode_for_winner_cand = 0;
1253   // Set this at the appropriate speed levels
1254   winner_mode_sf->tx_size_search_level = USE_FULL_RD;
1255   winner_mode_sf->enable_winner_mode_for_coeff_opt = 0;
1256   winner_mode_sf->enable_winner_mode_for_tx_size_srch = 0;
1257   winner_mode_sf->enable_winner_mode_for_use_tx_domain_dist = 0;
1258   winner_mode_sf->multi_winner_mode_type = 0;
1259   winner_mode_sf->dc_blk_pred_level = 0;
1260 }
1261 
init_lpf_sf(LOOP_FILTER_SPEED_FEATURES * lpf_sf)1262 static AOM_INLINE void init_lpf_sf(LOOP_FILTER_SPEED_FEATURES *lpf_sf) {
1263   lpf_sf->disable_loop_restoration_chroma = 0;
1264   lpf_sf->prune_wiener_based_on_src_var = 0;
1265   lpf_sf->prune_sgr_based_on_wiener = 0;
1266   lpf_sf->enable_sgr_ep_pruning = 0;
1267   lpf_sf->reduce_wiener_window_size = 0;
1268   lpf_sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
1269   lpf_sf->use_coarse_filter_level_search = 0;
1270   lpf_sf->cdef_pick_method = CDEF_FULL_SEARCH;
1271   // Set decoder side speed feature to use less dual sgr modes
1272   lpf_sf->dual_sgr_penalty_level = 0;
1273   lpf_sf->disable_lr_filter = 0;
1274 }
1275 
init_rt_sf(REAL_TIME_SPEED_FEATURES * rt_sf)1276 static AOM_INLINE void init_rt_sf(REAL_TIME_SPEED_FEATURES *rt_sf) {
1277   rt_sf->mode_search_skip_flags = 0;
1278   rt_sf->skip_interp_filter_search = 0;
1279   rt_sf->force_tx_search_off = 0;
1280   rt_sf->num_inter_modes_for_tx_search = INT_MAX;
1281   rt_sf->use_simple_rd_model = 0;
1282   rt_sf->nonrd_check_partition_merge_mode = 0;
1283   rt_sf->nonrd_check_partition_split = 0;
1284   rt_sf->skip_intra_pred_if_tx_skip = 0;
1285 }
1286 
av1_set_speed_features_framesize_dependent(AV1_COMP * cpi,int speed)1287 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
1288   SPEED_FEATURES *const sf = &cpi->sf;
1289   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1290 
1291   if (oxcf->mode == GOOD) {
1292     set_good_speed_feature_framesize_dependent(cpi, sf, speed);
1293   } else if (oxcf->mode == REALTIME) {
1294     set_rt_speed_feature_framesize_dependent(cpi, sf, speed);
1295   }
1296 
1297   if (!cpi->seq_params_locked) {
1298     cpi->common.seq_params.enable_masked_compound &=
1299         !sf->inter_sf.disable_masked_comp;
1300   }
1301 
1302   // This is only used in motion vector unit test.
1303   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 1)
1304     cpi->mv_search_params.find_fractional_mv_step = av1_return_max_sub_pixel_mv;
1305   else if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 2)
1306     cpi->mv_search_params.find_fractional_mv_step = av1_return_min_sub_pixel_mv;
1307 
1308   if ((cpi->oxcf.row_mt == 1) && (cpi->oxcf.max_threads > 1)) {
1309     if (sf->inter_sf.mv_cost_upd_level > 1) {
1310       // Set mv_cost_upd_level to use row level update.
1311       sf->inter_sf.mv_cost_upd_level = 1;
1312     }
1313   }
1314 }
1315 
av1_set_speed_features_framesize_independent(AV1_COMP * cpi,int speed)1316 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
1317   SPEED_FEATURES *const sf = &cpi->sf;
1318   WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
1319   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
1320   int i;
1321 
1322   init_hl_sf(&sf->hl_sf);
1323   init_tpl_sf(&sf->tpl_sf);
1324   init_gm_sf(&sf->gm_sf);
1325   init_part_sf(&sf->part_sf);
1326   init_mv_sf(&sf->mv_sf);
1327   init_inter_sf(&sf->inter_sf);
1328   init_interp_sf(&sf->interp_sf);
1329   init_intra_sf(&sf->intra_sf);
1330   init_tx_sf(&sf->tx_sf);
1331   init_rd_sf(&sf->rd_sf, oxcf);
1332   init_winner_mode_sf(&sf->winner_mode_sf);
1333   init_lpf_sf(&sf->lpf_sf);
1334   init_rt_sf(&sf->rt_sf);
1335 
1336   if (oxcf->mode == GOOD)
1337     set_good_speed_features_framesize_independent(cpi, sf, speed);
1338   else if (oxcf->mode == REALTIME)
1339     set_rt_speed_features_framesize_independent(cpi, sf, speed);
1340 
1341   if (!cpi->seq_params_locked) {
1342     cpi->common.seq_params.enable_dual_filter &=
1343         !sf->interp_sf.disable_dual_filter;
1344     cpi->common.seq_params.enable_restoration &= !sf->lpf_sf.disable_lr_filter;
1345 
1346     cpi->common.seq_params.enable_interintra_compound &=
1347         (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
1348   }
1349 
1350   // TODO(any) Currently use_intra_txb_hash is enabled in speed 1,2 for
1351   // intra-only encoding (key_freq_max == 0). Experiment with this speed feature
1352   // by enabling for image encoding in speed 1 and 2.
1353   if (cpi->oxcf.kf_cfg.key_freq_max == 0 && speed >= 1 && speed <= 2) {
1354     sf->tx_sf.use_intra_txb_hash = 1;
1355   }
1356 
1357   // sf->part_sf.partition_search_breakout_dist_thr is set assuming max 64x64
1358   // blocks. Normalise this if the blocks are bigger.
1359   if (MAX_SB_SIZE_LOG2 > 6) {
1360     sf->part_sf.partition_search_breakout_dist_thr <<=
1361         2 * (MAX_SB_SIZE_LOG2 - 6);
1362   }
1363 
1364   const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
1365   for (i = 0; i < MAX_MESH_STEP; ++i) {
1366     sf->mv_sf.mesh_patterns[i].range =
1367         good_quality_mesh_patterns[mesh_speed][i].range;
1368     sf->mv_sf.mesh_patterns[i].interval =
1369         good_quality_mesh_patterns[mesh_speed][i].interval;
1370   }
1371 
1372   // Update the mesh pattern of exhaustive motion search for intraBC
1373   // Though intraBC mesh pattern is populated for all frame types, it is used
1374   // only for intra frames of screen contents
1375   for (i = 0; i < MAX_MESH_STEP; ++i) {
1376     sf->mv_sf.intrabc_mesh_patterns[i].range =
1377         intrabc_mesh_patterns[mesh_speed][i].range;
1378     sf->mv_sf.intrabc_mesh_patterns[i].interval =
1379         intrabc_mesh_patterns[mesh_speed][i].interval;
1380   }
1381 
1382   // Slow quant, dct and trellis not worthwhile for first pass
1383   // so make sure they are always turned off.
1384   if (is_stat_generation_stage(cpi))
1385     sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
1386 
1387   // No recode or trellis for 1 pass.
1388   if (oxcf->pass == 0 && has_no_stats_stage(cpi))
1389     sf->hl_sf.recode_loop = DISALLOW_RECODE;
1390 
1391   MotionVectorSearchParams *const mv_search_params = &cpi->mv_search_params;
1392   if (sf->mv_sf.subpel_search_method == SUBPEL_TREE) {
1393     mv_search_params->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
1394   } else if (sf->mv_sf.subpel_search_method == SUBPEL_TREE_PRUNED) {
1395     mv_search_params->find_fractional_mv_step =
1396         av1_find_best_sub_pixel_tree_pruned;
1397   } else if (sf->mv_sf.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
1398     mv_search_params->find_fractional_mv_step =
1399         av1_find_best_sub_pixel_tree_pruned_more;
1400   }
1401 
1402   // This is only used in motion vector unit test.
1403   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 1)
1404     mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
1405   else if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test == 2)
1406     mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
1407 
1408   // assert ensures that tx_domain_dist_level is accessed correctly
1409   assert(cpi->sf.rd_sf.tx_domain_dist_thres_level >= 0 &&
1410          cpi->sf.rd_sf.tx_domain_dist_thres_level < 3);
1411   memcpy(winner_mode_params->tx_domain_dist_threshold,
1412          tx_domain_dist_thresholds[cpi->sf.rd_sf.tx_domain_dist_thres_level],
1413          sizeof(winner_mode_params->tx_domain_dist_threshold));
1414 
1415   assert(cpi->sf.rd_sf.tx_domain_dist_level >= 0 &&
1416          cpi->sf.rd_sf.tx_domain_dist_level < 3);
1417   memcpy(winner_mode_params->use_transform_domain_distortion,
1418          tx_domain_dist_types[cpi->sf.rd_sf.tx_domain_dist_level],
1419          sizeof(winner_mode_params->use_transform_domain_distortion));
1420 
1421   // assert ensures that coeff_opt_thresholds is accessed correctly
1422   assert(cpi->sf.rd_sf.perform_coeff_opt >= 0 &&
1423          cpi->sf.rd_sf.perform_coeff_opt < 9);
1424   memcpy(winner_mode_params->coeff_opt_thresholds,
1425          &coeff_opt_thresholds[cpi->sf.rd_sf.perform_coeff_opt],
1426          sizeof(winner_mode_params->coeff_opt_thresholds));
1427 
1428   // assert ensures that predict_skip_levels is accessed correctly
1429   assert(cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction >= 0 &&
1430          cpi->sf.tx_sf.tx_type_search.use_skip_flag_prediction < 3);
1431   memcpy(winner_mode_params->skip_txfm_level,
1432          predict_skip_levels[cpi->sf.tx_sf.tx_type_search
1433                                  .use_skip_flag_prediction],
1434          sizeof(winner_mode_params->skip_txfm_level));
1435 
1436   // assert ensures that tx_size_search_level is accessed correctly
1437   assert(cpi->sf.winner_mode_sf.tx_size_search_level >= 0 &&
1438          cpi->sf.winner_mode_sf.tx_size_search_level < 3);
1439   memcpy(winner_mode_params->tx_size_search_methods,
1440          tx_size_search_methods[cpi->sf.winner_mode_sf.tx_size_search_level],
1441          sizeof(winner_mode_params->tx_size_search_methods));
1442   memcpy(winner_mode_params->predict_dc_level,
1443          predict_dc_levels[cpi->sf.winner_mode_sf.dc_blk_pred_level],
1444          sizeof(winner_mode_params->predict_dc_level));
1445 
1446   if (cpi->oxcf.row_mt == 1 && (cpi->oxcf.max_threads > 1)) {
1447     if (sf->inter_sf.inter_mode_rd_model_estimation == 1) {
1448       // Revert to type 2
1449       sf->inter_sf.inter_mode_rd_model_estimation = 2;
1450     }
1451 
1452     // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve
1453     // better parallelism when number of threads available are greater than or
1454     // equal to maximum number of reference frames allowed for global motion.
1455     if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH &&
1456         (cpi->oxcf.max_threads >=
1457          gm_available_reference_frames[sf->gm_sf.gm_search_type]))
1458       sf->gm_sf.prune_ref_frame_for_gm_search = 0;
1459   }
1460 }
1461 
1462 // Override some speed features based on qindex
av1_set_speed_features_qindex_dependent(AV1_COMP * cpi,int speed)1463 void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
1464   AV1_COMMON *const cm = &cpi->common;
1465   SPEED_FEATURES *const sf = &cpi->sf;
1466   WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
1467   const int boosted = frame_is_boosted(cpi);
1468   const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
1469   const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
1470   const int is_arf2_bwd_type =
1471       cpi->gf_group.update_type[cpi->gf_group.index] == INTNL_ARF_UPDATE;
1472 
1473   if (cpi->oxcf.mode == GOOD && speed == 0) {
1474     // qindex_thresh for resolution < 720p
1475     const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
1476     if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) {
1477       sf->inter_sf.skip_repeated_newmv = 1;
1478       sf->part_sf.simple_motion_search_split =
1479           cm->features.allow_screen_content_tools ? 1 : 2;
1480       sf->part_sf.simple_motion_search_early_term_none = 1;
1481       sf->tx_sf.model_based_prune_tx_search_level = 0;
1482     }
1483 
1484     if (is_720p_or_larger && cm->quant_params.base_qindex <= 128) {
1485       sf->rd_sf.perform_coeff_opt = 2 + is_1080p_or_larger;
1486       memcpy(winner_mode_params->coeff_opt_thresholds,
1487              &coeff_opt_thresholds[sf->rd_sf.perform_coeff_opt],
1488              sizeof(winner_mode_params->coeff_opt_thresholds));
1489       sf->part_sf.simple_motion_search_split =
1490           cm->features.allow_screen_content_tools ? 1 : 2;
1491       sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
1492       sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
1493       sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
1494       sf->inter_sf.skip_repeated_newmv = 1;
1495       sf->tx_sf.model_based_prune_tx_search_level = 0;
1496 
1497       if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) {
1498         sf->inter_sf.selective_ref_frame = 2;
1499         sf->rd_sf.tx_domain_dist_level = boosted ? 1 : 2;
1500         sf->rd_sf.tx_domain_dist_thres_level = 1;
1501         sf->part_sf.simple_motion_search_early_term_none = 1;
1502         sf->tx_sf.tx_type_search.ml_tx_split_thresh = 4000;
1503         sf->interp_sf.cb_pred_filter_search = 0;
1504         sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_2;
1505         sf->tx_sf.tx_type_search.skip_tx_search = 1;
1506         sf->tx_sf.use_intra_txb_hash = 1;
1507       }
1508     }
1509   }
1510 
1511   if (cpi->oxcf.mode == GOOD && speed >= 3) {
1512     // Disable extended partitions for lower quantizers
1513     const int qindex_thresh =
1514         cm->features.allow_screen_content_tools ? 50 : 100;
1515     if (cm->quant_params.base_qindex <= qindex_thresh && !boosted) {
1516       sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
1517     }
1518   }
1519 
1520   if (cpi->oxcf.mode == GOOD && speed >= 4) {
1521     // Disable extended partitions for lower quantizers
1522     const int qindex_thresh = boosted ? 80 : 120;
1523     if (cm->quant_params.base_qindex <= qindex_thresh &&
1524         !frame_is_intra_only(&cpi->common)) {
1525       sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
1526     }
1527   }
1528 
1529   if (cpi->oxcf.mode == GOOD && speed >= 5) {
1530     const int qindex_thresh = boosted ? 100 : 160;
1531     if (cm->quant_params.base_qindex <= qindex_thresh &&
1532         !frame_is_intra_only(&cpi->common)) {
1533       sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
1534     }
1535   }
1536 
1537   if (cpi->oxcf.mode == GOOD && (speed <= 2)) {
1538     if (!is_stat_generation_stage(cpi)) {
1539       // Use faster full-pel motion search for high quantizers.
1540       // Also use reduced total search range for low resolutions at high
1541       // quantizers.
1542       const int aggr = speed;
1543       const int qindex_thresh1 = ms_qindex_thresh[aggr][is_720p_or_larger][0];
1544       const int qindex_thresh2 = ms_qindex_thresh[aggr][is_720p_or_larger][1];
1545       const SEARCH_METHODS search_method =
1546           motion_search_method[is_720p_or_larger];
1547       if (cm->quant_params.base_qindex > qindex_thresh1) {
1548         sf->mv_sf.search_method = search_method;
1549         sf->tpl_sf.search_method = search_method;
1550       } else if (cm->quant_params.base_qindex > qindex_thresh2) {
1551         sf->mv_sf.search_method = NSTEP_8PT;
1552       }
1553     }
1554   }
1555 }
1556