1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/system_state.h"
26
27 #include "av1/common/av1_common_int.h"
28 #include "av1/common/cfl.h"
29 #include "av1/common/blockd.h"
30 #include "av1/common/common.h"
31 #include "av1/common/common_data.h"
32 #include "av1/common/entropy.h"
33 #include "av1/common/entropymode.h"
34 #include "av1/common/idct.h"
35 #include "av1/common/mvref_common.h"
36 #include "av1/common/obmc.h"
37 #include "av1/common/pred_common.h"
38 #include "av1/common/quant_common.h"
39 #include "av1/common/reconinter.h"
40 #include "av1/common/reconintra.h"
41 #include "av1/common/scan.h"
42 #include "av1/common/seg_common.h"
43 #include "av1/common/txb_common.h"
44 #include "av1/common/warped_motion.h"
45
46 #include "av1/encoder/aq_variance.h"
47 #include "av1/encoder/av1_quantize.h"
48 #include "av1/encoder/cost.h"
49 #include "av1/encoder/compound_type.h"
50 #include "av1/encoder/encodemb.h"
51 #include "av1/encoder/encodemv.h"
52 #include "av1/encoder/encoder.h"
53 #include "av1/encoder/encodetxb.h"
54 #include "av1/encoder/hybrid_fwd_txfm.h"
55 #include "av1/encoder/interp_search.h"
56 #include "av1/encoder/intra_mode_search.h"
57 #include "av1/encoder/intra_mode_search_utils.h"
58 #include "av1/encoder/mcomp.h"
59 #include "av1/encoder/ml.h"
60 #include "av1/encoder/mode_prune_model_weights.h"
61 #include "av1/encoder/model_rd.h"
62 #include "av1/encoder/motion_search_facade.h"
63 #include "av1/encoder/palette.h"
64 #include "av1/encoder/pustats.h"
65 #include "av1/encoder/random.h"
66 #include "av1/encoder/ratectrl.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/tokenize.h"
71 #include "av1/encoder/tpl_model.h"
72 #include "av1/encoder/tx_search.h"
73
74 #define LAST_NEW_MV_INDEX 6
75
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81 10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82 9975, 9951, 9927, 9903, 9879, 9854, 9830, 9806, 9782, 9758, 9734,
83 9710, 9686, 9662, 9638, 9614, 9589, 9565, 9541, 9517, 9493, 9469,
84 9445, 9421, 9397, 9373, 9349, 9324, 9300, 9276, 9252, 9228, 9204,
85 9180, 9156, 9132, 9108, 9083, 9059, 9035, 9011, 8987, 8963, 8939,
86 8915, 8891, 8867, 8843, 8818, 8794, 8770, 8746, 8722, 8698, 8674,
87 8650, 8626, 8602, 8578, 8553, 8529, 8505, 8481, 8457, 8433, 8409,
88 8385, 8361, 8337, 8312, 8288, 8264, 8240, 8216, 8192, 8168, 8144,
89 8120, 8096, 8072, 8047, 8023, 7999, 7975, 7951, 7927, 7903, 7879,
90 7855, 7831, 7806, 7782, 7758, 7734, 7710, 7686, 7662, 7638, 7614,
91 7590, 7566, 7541, 7517, 7493, 7469, 7445, 7421, 7397, 7373, 7349,
92 7325, 7301, 7276, 7252, 7228, 7204, 7180, 7156, 7132, 7108, 7084,
93 7060, 7035, 7011, 6987, 6963, 6939, 6915, 6891, 6867, 6843, 6819,
94 6795, 6770, 6746, 6722, 6698, 6674, 6650, 6626, 6602, 6578, 6554,
95 6530, 6505, 6481, 6457, 6433, 6409, 6385, 6361, 6337, 6313, 6289,
96 6264, 6240, 6216, 6192, 6168, 6144, 6120, 6096, 6072, 6048, 6024,
97 5999, 5975, 5951, 5927, 5903, 5879, 5855, 5831, 5807, 5783, 5758,
98 5734, 5710, 5686, 5662, 5638, 5614, 5590, 5566, 5542, 5518, 5493,
99 5469, 5445, 5421, 5397, 5373, 5349, 5325, 5301, 5277, 5253, 5228,
100 5204, 5180, 5156, 5132, 5108, 5084, 5060, 5036, 5012, 4987, 4963,
101 4939, 4915, 4891, 4867, 4843, 4819, 4795, 4771, 4747, 4722, 4698,
102 4674, 4650, 4626, 4602, 4578, 4554, 4530, 4506, 4482, 4457, 4433,
103 4409, 4385, 4361, 4337, 4313, 4289, 4265, 4241, 4216, 4192, 4168,
104 4144, 4120, 4096
105 };
106
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108 THR_NEARESTMV,
109 THR_NEARESTL2,
110 THR_NEARESTL3,
111 THR_NEARESTB,
112 THR_NEARESTA2,
113 THR_NEARESTA,
114 THR_NEARESTG,
115
116 THR_NEWMV,
117 THR_NEWL2,
118 THR_NEWL3,
119 THR_NEWB,
120 THR_NEWA2,
121 THR_NEWA,
122 THR_NEWG,
123
124 THR_NEARMV,
125 THR_NEARL2,
126 THR_NEARL3,
127 THR_NEARB,
128 THR_NEARA2,
129 THR_NEARA,
130 THR_NEARG,
131
132 THR_GLOBALMV,
133 THR_GLOBALL2,
134 THR_GLOBALL3,
135 THR_GLOBALB,
136 THR_GLOBALA2,
137 THR_GLOBALA,
138 THR_GLOBALG,
139
140 THR_COMP_NEAREST_NEARESTLA,
141 THR_COMP_NEAREST_NEARESTL2A,
142 THR_COMP_NEAREST_NEARESTL3A,
143 THR_COMP_NEAREST_NEARESTGA,
144 THR_COMP_NEAREST_NEARESTLB,
145 THR_COMP_NEAREST_NEARESTL2B,
146 THR_COMP_NEAREST_NEARESTL3B,
147 THR_COMP_NEAREST_NEARESTGB,
148 THR_COMP_NEAREST_NEARESTLA2,
149 THR_COMP_NEAREST_NEARESTL2A2,
150 THR_COMP_NEAREST_NEARESTL3A2,
151 THR_COMP_NEAREST_NEARESTGA2,
152 THR_COMP_NEAREST_NEARESTLL2,
153 THR_COMP_NEAREST_NEARESTLL3,
154 THR_COMP_NEAREST_NEARESTLG,
155 THR_COMP_NEAREST_NEARESTBA,
156
157 THR_COMP_NEAR_NEARLB,
158 THR_COMP_NEW_NEWLB,
159 THR_COMP_NEW_NEARESTLB,
160 THR_COMP_NEAREST_NEWLB,
161 THR_COMP_NEW_NEARLB,
162 THR_COMP_NEAR_NEWLB,
163 THR_COMP_GLOBAL_GLOBALLB,
164
165 THR_COMP_NEAR_NEARLA,
166 THR_COMP_NEW_NEWLA,
167 THR_COMP_NEW_NEARESTLA,
168 THR_COMP_NEAREST_NEWLA,
169 THR_COMP_NEW_NEARLA,
170 THR_COMP_NEAR_NEWLA,
171 THR_COMP_GLOBAL_GLOBALLA,
172
173 THR_COMP_NEAR_NEARL2A,
174 THR_COMP_NEW_NEWL2A,
175 THR_COMP_NEW_NEARESTL2A,
176 THR_COMP_NEAREST_NEWL2A,
177 THR_COMP_NEW_NEARL2A,
178 THR_COMP_NEAR_NEWL2A,
179 THR_COMP_GLOBAL_GLOBALL2A,
180
181 THR_COMP_NEAR_NEARL3A,
182 THR_COMP_NEW_NEWL3A,
183 THR_COMP_NEW_NEARESTL3A,
184 THR_COMP_NEAREST_NEWL3A,
185 THR_COMP_NEW_NEARL3A,
186 THR_COMP_NEAR_NEWL3A,
187 THR_COMP_GLOBAL_GLOBALL3A,
188
189 THR_COMP_NEAR_NEARGA,
190 THR_COMP_NEW_NEWGA,
191 THR_COMP_NEW_NEARESTGA,
192 THR_COMP_NEAREST_NEWGA,
193 THR_COMP_NEW_NEARGA,
194 THR_COMP_NEAR_NEWGA,
195 THR_COMP_GLOBAL_GLOBALGA,
196
197 THR_COMP_NEAR_NEARL2B,
198 THR_COMP_NEW_NEWL2B,
199 THR_COMP_NEW_NEARESTL2B,
200 THR_COMP_NEAREST_NEWL2B,
201 THR_COMP_NEW_NEARL2B,
202 THR_COMP_NEAR_NEWL2B,
203 THR_COMP_GLOBAL_GLOBALL2B,
204
205 THR_COMP_NEAR_NEARL3B,
206 THR_COMP_NEW_NEWL3B,
207 THR_COMP_NEW_NEARESTL3B,
208 THR_COMP_NEAREST_NEWL3B,
209 THR_COMP_NEW_NEARL3B,
210 THR_COMP_NEAR_NEWL3B,
211 THR_COMP_GLOBAL_GLOBALL3B,
212
213 THR_COMP_NEAR_NEARGB,
214 THR_COMP_NEW_NEWGB,
215 THR_COMP_NEW_NEARESTGB,
216 THR_COMP_NEAREST_NEWGB,
217 THR_COMP_NEW_NEARGB,
218 THR_COMP_NEAR_NEWGB,
219 THR_COMP_GLOBAL_GLOBALGB,
220
221 THR_COMP_NEAR_NEARLA2,
222 THR_COMP_NEW_NEWLA2,
223 THR_COMP_NEW_NEARESTLA2,
224 THR_COMP_NEAREST_NEWLA2,
225 THR_COMP_NEW_NEARLA2,
226 THR_COMP_NEAR_NEWLA2,
227 THR_COMP_GLOBAL_GLOBALLA2,
228
229 THR_COMP_NEAR_NEARL2A2,
230 THR_COMP_NEW_NEWL2A2,
231 THR_COMP_NEW_NEARESTL2A2,
232 THR_COMP_NEAREST_NEWL2A2,
233 THR_COMP_NEW_NEARL2A2,
234 THR_COMP_NEAR_NEWL2A2,
235 THR_COMP_GLOBAL_GLOBALL2A2,
236
237 THR_COMP_NEAR_NEARL3A2,
238 THR_COMP_NEW_NEWL3A2,
239 THR_COMP_NEW_NEARESTL3A2,
240 THR_COMP_NEAREST_NEWL3A2,
241 THR_COMP_NEW_NEARL3A2,
242 THR_COMP_NEAR_NEWL3A2,
243 THR_COMP_GLOBAL_GLOBALL3A2,
244
245 THR_COMP_NEAR_NEARGA2,
246 THR_COMP_NEW_NEWGA2,
247 THR_COMP_NEW_NEARESTGA2,
248 THR_COMP_NEAREST_NEWGA2,
249 THR_COMP_NEW_NEARGA2,
250 THR_COMP_NEAR_NEWGA2,
251 THR_COMP_GLOBAL_GLOBALGA2,
252
253 THR_COMP_NEAR_NEARLL2,
254 THR_COMP_NEW_NEWLL2,
255 THR_COMP_NEW_NEARESTLL2,
256 THR_COMP_NEAREST_NEWLL2,
257 THR_COMP_NEW_NEARLL2,
258 THR_COMP_NEAR_NEWLL2,
259 THR_COMP_GLOBAL_GLOBALLL2,
260
261 THR_COMP_NEAR_NEARLL3,
262 THR_COMP_NEW_NEWLL3,
263 THR_COMP_NEW_NEARESTLL3,
264 THR_COMP_NEAREST_NEWLL3,
265 THR_COMP_NEW_NEARLL3,
266 THR_COMP_NEAR_NEWLL3,
267 THR_COMP_GLOBAL_GLOBALLL3,
268
269 THR_COMP_NEAR_NEARLG,
270 THR_COMP_NEW_NEWLG,
271 THR_COMP_NEW_NEARESTLG,
272 THR_COMP_NEAREST_NEWLG,
273 THR_COMP_NEW_NEARLG,
274 THR_COMP_NEAR_NEWLG,
275 THR_COMP_GLOBAL_GLOBALLG,
276
277 THR_COMP_NEAR_NEARBA,
278 THR_COMP_NEW_NEWBA,
279 THR_COMP_NEW_NEARESTBA,
280 THR_COMP_NEAREST_NEWBA,
281 THR_COMP_NEW_NEARBA,
282 THR_COMP_NEAR_NEWBA,
283 THR_COMP_GLOBAL_GLOBALBA,
284
285 THR_DC,
286 THR_PAETH,
287 THR_SMOOTH,
288 THR_SMOOTH_V,
289 THR_SMOOTH_H,
290 THR_H_PRED,
291 THR_V_PRED,
292 THR_D135_PRED,
293 THR_D203_PRED,
294 THR_D157_PRED,
295 THR_D67_PRED,
296 THR_D113_PRED,
297 THR_D45_PRED,
298 };
299
300 /*!\cond */
301 typedef struct SingleInterModeState {
302 int64_t rd;
303 MV_REFERENCE_FRAME ref_frame;
304 int valid;
305 } SingleInterModeState;
306
307 typedef struct InterModeSearchState {
308 int64_t best_rd;
309 int64_t best_skip_rd[2];
310 MB_MODE_INFO best_mbmode;
311 int best_rate_y;
312 int best_rate_uv;
313 int best_mode_skippable;
314 int best_skip2;
315 THR_MODES best_mode_index;
316 int num_available_refs;
317 int64_t dist_refs[REF_FRAMES];
318 int dist_order_refs[REF_FRAMES];
319 int64_t mode_threshold[MAX_MODES];
320 int64_t best_intra_rd;
321 unsigned int best_pred_sse;
322
323 /*!
324 * \brief Keep track of best intra rd for use in compound mode.
325 */
326 int64_t best_pred_rd[REFERENCE_MODES];
327 int64_t best_pred_diff[REFERENCE_MODES];
328 // Save a set of single_newmv for each checked ref_mv.
329 int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
330 int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
331 int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
332 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
333 // The rd of simple translation in single inter modes
334 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
335 int64_t best_single_rd[REF_FRAMES];
336 PREDICTION_MODE best_single_mode[REF_FRAMES];
337
338 // Single search results by [directions][modes][reference frames]
339 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
340 int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
341 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
342 [FWD_REFS];
343 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
344 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
345 IntraModeSearchState intra_search_state;
346 RD_STATS best_y_rdcost;
347 } InterModeSearchState;
348 /*!\endcond */
349
av1_inter_mode_data_init(TileDataEnc * tile_data)350 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
351 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
352 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
353 md->ready = 0;
354 md->num = 0;
355 md->dist_sum = 0;
356 md->ld_sum = 0;
357 md->sse_sum = 0;
358 md->sse_sse_sum = 0;
359 md->sse_ld_sum = 0;
360 }
361 }
362
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)363 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
364 int64_t sse, int *est_residue_cost,
365 int64_t *est_dist) {
366 aom_clear_system_state();
367 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
368 if (md->ready) {
369 if (sse < md->dist_mean) {
370 *est_residue_cost = 0;
371 *est_dist = sse;
372 } else {
373 *est_dist = (int64_t)round(md->dist_mean);
374 const double est_ld = md->a * sse + md->b;
375 // Clamp estimated rate cost by INT_MAX / 2.
376 // TODO(angiebird@google.com): find better solution than clamping.
377 if (fabs(est_ld) < 1e-2) {
378 *est_residue_cost = INT_MAX / 2;
379 } else {
380 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
381 if (est_residue_cost_dbl < 0) {
382 *est_residue_cost = 0;
383 } else {
384 *est_residue_cost =
385 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
386 }
387 }
388 if (*est_residue_cost <= 0) {
389 *est_residue_cost = 0;
390 *est_dist = sse;
391 }
392 }
393 return 1;
394 }
395 return 0;
396 }
397
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)398 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
399 aom_clear_system_state();
400 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
401 const int block_idx = inter_mode_data_block_idx(bsize);
402 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
403 if (block_idx == -1) continue;
404 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
405 continue;
406 } else {
407 if (md->ready == 0) {
408 md->dist_mean = md->dist_sum / md->num;
409 md->ld_mean = md->ld_sum / md->num;
410 md->sse_mean = md->sse_sum / md->num;
411 md->sse_sse_mean = md->sse_sse_sum / md->num;
412 md->sse_ld_mean = md->sse_ld_sum / md->num;
413 } else {
414 const double factor = 3;
415 md->dist_mean =
416 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
417 md->ld_mean =
418 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
419 md->sse_mean =
420 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
421 md->sse_sse_mean =
422 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
423 (factor + 1);
424 md->sse_ld_mean =
425 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
426 (factor + 1);
427 }
428
429 const double my = md->ld_mean;
430 const double mx = md->sse_mean;
431 const double dx = sqrt(md->sse_sse_mean);
432 const double dxy = md->sse_ld_mean;
433
434 md->a = (dxy - mx * my) / (dx * dx - mx * mx);
435 md->b = my - md->a * mx;
436 md->ready = 1;
437
438 md->num = 0;
439 md->dist_sum = 0;
440 md->ld_sum = 0;
441 md->sse_sum = 0;
442 md->sse_sse_sum = 0;
443 md->sse_ld_sum = 0;
444 }
445 (void)rdmult;
446 }
447 }
448
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)449 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
450 BLOCK_SIZE bsize, int64_t sse,
451 int64_t dist, int residue_cost) {
452 if (residue_cost == 0 || sse == dist) return;
453 const int block_idx = inter_mode_data_block_idx(bsize);
454 if (block_idx == -1) return;
455 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
456 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
457 aom_clear_system_state();
458 const double ld = (sse - dist) * 1. / residue_cost;
459 ++rd_model->num;
460 rd_model->dist_sum += dist;
461 rd_model->ld_sum += ld;
462 rd_model->sse_sum += sse;
463 rd_model->sse_sse_sum += (double)sse * (double)sse;
464 rd_model->sse_ld_sum += sse * ld;
465 }
466 }
467
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)468 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
469 int mode_rate, int64_t sse,
470 int64_t rd, RD_STATS *rd_cost,
471 RD_STATS *rd_cost_y,
472 RD_STATS *rd_cost_uv,
473 const MB_MODE_INFO *mbmi) {
474 const int num = inter_modes_info->num;
475 assert(num < MAX_INTER_MODES);
476 inter_modes_info->mbmi_arr[num] = *mbmi;
477 inter_modes_info->mode_rate_arr[num] = mode_rate;
478 inter_modes_info->sse_arr[num] = sse;
479 inter_modes_info->est_rd_arr[num] = rd;
480 inter_modes_info->rd_cost_arr[num] = *rd_cost;
481 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
482 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
483 ++inter_modes_info->num;
484 }
485
compare_rd_idx_pair(const void * a,const void * b)486 static int compare_rd_idx_pair(const void *a, const void *b) {
487 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
488 return 0;
489 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
490 return 1;
491 } else {
492 return -1;
493 }
494 }
495
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)496 static AOM_INLINE void inter_modes_info_sort(
497 const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
498 if (inter_modes_info->num == 0) {
499 return;
500 }
501 for (int i = 0; i < inter_modes_info->num; ++i) {
502 rd_idx_pair_arr[i].idx = i;
503 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
504 }
505 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
506 compare_rd_idx_pair);
507 }
508
509 // Similar to get_horver_correlation, but also takes into account first
510 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)511 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
512 int width, int height, float *hcorr,
513 float *vcorr) {
514 // The following notation is used:
515 // x - current pixel
516 // y - left neighbor pixel
517 // z - top neighbor pixel
518 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
519 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
520 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
521
522 // First, process horizontal correlation on just the first row
523 x_sum += diff[0];
524 x2_sum += diff[0] * diff[0];
525 x_firstrow += diff[0];
526 x2_firstrow += diff[0] * diff[0];
527 for (int j = 1; j < width; ++j) {
528 const int16_t x = diff[j];
529 const int16_t y = diff[j - 1];
530 x_sum += x;
531 x_firstrow += x;
532 x2_sum += x * x;
533 x2_firstrow += x * x;
534 xy_sum += x * y;
535 }
536
537 // Process vertical correlation in the first column
538 x_firstcol += diff[0];
539 x2_firstcol += diff[0] * diff[0];
540 for (int i = 1; i < height; ++i) {
541 const int16_t x = diff[i * stride];
542 const int16_t z = diff[(i - 1) * stride];
543 x_sum += x;
544 x_firstcol += x;
545 x2_sum += x * x;
546 x2_firstcol += x * x;
547 xz_sum += x * z;
548 }
549
550 // Now process horiz and vert correlation through the rest unit
551 for (int i = 1; i < height; ++i) {
552 for (int j = 1; j < width; ++j) {
553 const int16_t x = diff[i * stride + j];
554 const int16_t y = diff[i * stride + j - 1];
555 const int16_t z = diff[(i - 1) * stride + j];
556 x_sum += x;
557 x2_sum += x * x;
558 xy_sum += x * y;
559 xz_sum += x * z;
560 }
561 }
562
563 for (int j = 0; j < width; ++j) {
564 x_finalrow += diff[(height - 1) * stride + j];
565 x2_finalrow +=
566 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
567 }
568 for (int i = 0; i < height; ++i) {
569 x_finalcol += diff[i * stride + width - 1];
570 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
571 }
572
573 int64_t xhor_sum = x_sum - x_finalcol;
574 int64_t xver_sum = x_sum - x_finalrow;
575 int64_t y_sum = x_sum - x_firstcol;
576 int64_t z_sum = x_sum - x_firstrow;
577 int64_t x2hor_sum = x2_sum - x2_finalcol;
578 int64_t x2ver_sum = x2_sum - x2_finalrow;
579 int64_t y2_sum = x2_sum - x2_firstcol;
580 int64_t z2_sum = x2_sum - x2_firstrow;
581
582 const float num_hor = (float)(height * (width - 1));
583 const float num_ver = (float)((height - 1) * width);
584
585 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
586 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
587
588 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
589 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
590
591 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
592 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
593
594 if (xhor_var_n > 0 && y_var_n > 0) {
595 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
596 *hcorr = *hcorr < 0 ? 0 : *hcorr;
597 } else {
598 *hcorr = 1.0;
599 }
600 if (xver_var_n > 0 && z_var_n > 0) {
601 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
602 *vcorr = *vcorr < 0 ? 0 : *vcorr;
603 } else {
604 *vcorr = 1.0;
605 }
606 }
607
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)608 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
609 int64_t *sse_y) {
610 const AV1_COMMON *cm = &cpi->common;
611 const int num_planes = av1_num_planes(cm);
612 const MACROBLOCKD *xd = &x->e_mbd;
613 const MB_MODE_INFO *mbmi = xd->mi[0];
614 int64_t total_sse = 0;
615 for (int plane = 0; plane < num_planes; ++plane) {
616 if (plane && !xd->is_chroma_ref) break;
617 const struct macroblock_plane *const p = &x->plane[plane];
618 const struct macroblockd_plane *const pd = &xd->plane[plane];
619 const BLOCK_SIZE bs =
620 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
621 unsigned int sse;
622
623 cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
624 &sse);
625 total_sse += sse;
626 if (!plane && sse_y) *sse_y = sse;
627 }
628 total_sse <<= 4;
629 return total_sse;
630 }
631
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)632 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
633 intptr_t block_size, int64_t *ssz) {
634 int i;
635 int64_t error = 0, sqcoeff = 0;
636
637 for (i = 0; i < block_size; i++) {
638 const int diff = coeff[i] - dqcoeff[i];
639 error += diff * diff;
640 sqcoeff += coeff[i] * coeff[i];
641 }
642
643 *ssz = sqcoeff;
644 return error;
645 }
646
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)647 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
648 intptr_t block_size) {
649 int64_t error = 0;
650
651 for (int i = 0; i < block_size; i++) {
652 const int diff = coeff[i] - dqcoeff[i];
653 error += diff * diff;
654 }
655
656 return error;
657 }
658
659 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)660 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
661 const tran_low_t *dqcoeff, intptr_t block_size,
662 int64_t *ssz, int bd) {
663 int i;
664 int64_t error = 0, sqcoeff = 0;
665 int shift = 2 * (bd - 8);
666 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
667
668 for (i = 0; i < block_size; i++) {
669 const int64_t diff = coeff[i] - dqcoeff[i];
670 error += diff * diff;
671 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
672 }
673 assert(error >= 0 && sqcoeff >= 0);
674 error = (error + rounding) >> shift;
675 sqcoeff = (sqcoeff + rounding) >> shift;
676
677 *ssz = sqcoeff;
678 return error;
679 }
680 #endif
681
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)682 static int conditional_skipintra(PREDICTION_MODE mode,
683 PREDICTION_MODE best_intra_mode) {
684 if (mode == D113_PRED && best_intra_mode != V_PRED &&
685 best_intra_mode != D135_PRED)
686 return 1;
687 if (mode == D67_PRED && best_intra_mode != V_PRED &&
688 best_intra_mode != D45_PRED)
689 return 1;
690 if (mode == D203_PRED && best_intra_mode != H_PRED &&
691 best_intra_mode != D45_PRED)
692 return 1;
693 if (mode == D157_PRED && best_intra_mode != H_PRED &&
694 best_intra_mode != D135_PRED)
695 return 1;
696 return 0;
697 }
698
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)699 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
700 int16_t mode_context) {
701 if (is_inter_compound_mode(mode)) {
702 return mode_costs
703 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
704 }
705
706 int mode_cost = 0;
707 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
708
709 assert(is_inter_mode(mode));
710
711 if (mode == NEWMV) {
712 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
713 return mode_cost;
714 } else {
715 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
716 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
717
718 if (mode == GLOBALMV) {
719 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
720 return mode_cost;
721 } else {
722 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
723 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
724 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
725 return mode_cost;
726 }
727 }
728 }
729
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)730 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
731 int ref_idx) {
732 return ref_idx ? compound_ref1_mode(this_mode)
733 : compound_ref0_mode(this_mode);
734 }
735
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])736 static AOM_INLINE void estimate_ref_frame_costs(
737 const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
738 int segment_id, unsigned int *ref_costs_single,
739 unsigned int (*ref_costs_comp)[REF_FRAMES]) {
740 int seg_ref_active =
741 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
742 if (seg_ref_active) {
743 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
744 int ref_frame;
745 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
746 memset(ref_costs_comp[ref_frame], 0,
747 REF_FRAMES * sizeof((*ref_costs_comp)[0]));
748 } else {
749 int intra_inter_ctx = av1_get_intra_inter_context(xd);
750 ref_costs_single[INTRA_FRAME] =
751 mode_costs->intra_inter_cost[intra_inter_ctx][0];
752 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
753
754 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
755 ref_costs_single[i] = base_cost;
756
757 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
758 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
759 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
760 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
761 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
762 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
763
764 // Determine cost of a single ref frame, where frame types are represented
765 // by a tree:
766 // Level 0: add cost whether this ref is a forward or backward ref
767 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
768 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
769 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
770 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
772 ref_costs_single[ALTREF2_FRAME] +=
773 mode_costs->single_ref_cost[ctx_p1][0][1];
774 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775
776 // Level 1: if this ref is forward ref,
777 // add cost whether it is last/last2 or last3/golden
778 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
779 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
780 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
781 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
782
783 // Level 1: if this ref is backward ref
784 // then add cost whether this ref is altref or backward ref
785 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
786 ref_costs_single[ALTREF2_FRAME] +=
787 mode_costs->single_ref_cost[ctx_p2][1][0];
788 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
789
790 // Level 2: further add cost whether this ref is last or last2
791 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
792 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
793
794 // Level 2: last3 or golden
795 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
796 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
797
798 // Level 2: bwdref or altref2
799 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
800 ref_costs_single[ALTREF2_FRAME] +=
801 mode_costs->single_ref_cost[ctx_p6][5][1];
802
803 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
804 // Similar to single ref, determine cost of compound ref frames.
805 // cost_compound_refs = cost_first_ref + cost_second_ref
806 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
807 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
808 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
809 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
810 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
811
812 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
813 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
814
815 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
816 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
817 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
818 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
819 ref_bicomp_costs[ALTREF_FRAME] = 0;
820
821 // cost of first ref frame
822 ref_bicomp_costs[LAST_FRAME] +=
823 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
824 ref_bicomp_costs[LAST2_FRAME] +=
825 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
826 ref_bicomp_costs[LAST3_FRAME] +=
827 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
828 ref_bicomp_costs[GOLDEN_FRAME] +=
829 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
830
831 ref_bicomp_costs[LAST_FRAME] +=
832 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
833 ref_bicomp_costs[LAST2_FRAME] +=
834 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
835
836 ref_bicomp_costs[LAST3_FRAME] +=
837 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
838 ref_bicomp_costs[GOLDEN_FRAME] +=
839 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
840
841 // cost of second ref frame
842 ref_bicomp_costs[BWDREF_FRAME] +=
843 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
844 ref_bicomp_costs[ALTREF2_FRAME] +=
845 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
846 ref_bicomp_costs[ALTREF_FRAME] +=
847 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
848
849 ref_bicomp_costs[BWDREF_FRAME] +=
850 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
851 ref_bicomp_costs[ALTREF2_FRAME] +=
852 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
853
854 // cost: if one ref frame is forward ref, the other ref is backward ref
855 int ref0, ref1;
856 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
857 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
858 ref_costs_comp[ref0][ref1] =
859 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
860 }
861 }
862
863 // cost: if both ref frames are the same side.
864 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
865 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
866 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
867 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
868 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
869 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
870 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
871 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
872 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
873 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
874 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
875 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
876 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
877 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
878 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
879 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
880 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
881 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
882 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
883 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
884 } else {
885 int ref0, ref1;
886 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
887 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
888 ref_costs_comp[ref0][ref1] = 512;
889 }
890 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
891 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
892 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
893 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
894 }
895 }
896 }
897
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int64_t comp_pred_diff[REFERENCE_MODES],int skippable)898 static AOM_INLINE void store_coding_context(
899 #if CONFIG_INTERNAL_STATS
900 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
901 #else
902 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
903 #endif // CONFIG_INTERNAL_STATS
904 int64_t comp_pred_diff[REFERENCE_MODES], int skippable) {
905 MACROBLOCKD *const xd = &x->e_mbd;
906
907 // Take a snapshot of the coding context so it can be
908 // restored if we decide to encode this way
909 ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
910 ctx->skippable = skippable;
911 #if CONFIG_INTERNAL_STATS
912 ctx->best_mode_index = mode_index;
913 #endif // CONFIG_INTERNAL_STATS
914 ctx->mic = *xd->mi[0];
915 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
916 av1_ref_frame_type(xd->mi[0]->ref_frame));
917 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
918 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
919 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
920 }
921
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])922 static AOM_INLINE void setup_buffer_ref_mvs_inter(
923 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
924 BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
925 const AV1_COMMON *cm = &cpi->common;
926 const int num_planes = av1_num_planes(cm);
927 const YV12_BUFFER_CONFIG *scaled_ref_frame =
928 av1_get_scaled_ref_frame(cpi, ref_frame);
929 MACROBLOCKD *const xd = &x->e_mbd;
930 MB_MODE_INFO *const mbmi = xd->mi[0];
931 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
932 const struct scale_factors *const sf =
933 get_ref_scale_factors_const(cm, ref_frame);
934 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
935 assert(yv12 != NULL);
936
937 if (scaled_ref_frame) {
938 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
939 // support scaling.
940 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
941 num_planes);
942 } else {
943 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
944 }
945
946 // Gets an initial list of candidate vectors from neighbours and orders them
947 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
948 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
949 mbmi_ext->mode_context);
950 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
951 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
952 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
953 // Further refinement that is encode side only to test the top few candidates
954 // in full and choose the best as the center point for subsequent searches.
955 // The current implementation doesn't support scaling.
956 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
957 ref_frame, block_size);
958
959 // Go back to unscaled reference.
960 if (scaled_ref_frame) {
961 // We had temporarily setup pred block based on scaled reference above. Go
962 // back to unscaled reference now, for subsequent use.
963 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
964 }
965 }
966
967 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
969
970 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)971 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
972 const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
973 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
974 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
975 xd->mb_to_bottom_edge +
976 RIGHT_BOTTOM_MARGIN };
977 clamp_mv(mv, &mv_limits);
978 }
979
980 /* If the current mode shares the same mv with other modes with higher cost,
981 * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)982 static int skip_repeated_mv(const AV1_COMMON *const cm,
983 const MACROBLOCK *const x,
984 PREDICTION_MODE this_mode,
985 const MV_REFERENCE_FRAME ref_frames[2],
986 InterModeSearchState *search_state) {
987 const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
988 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
989 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
990 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
991 PREDICTION_MODE compare_mode = MB_MODE_COUNT;
992 if (!is_comp_pred) {
993 if (this_mode == NEARMV) {
994 if (ref_mv_count == 0) {
995 // NEARMV has the same motion vector as NEARESTMV
996 compare_mode = NEARESTMV;
997 }
998 if (ref_mv_count == 1 &&
999 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1000 // NEARMV has the same motion vector as GLOBALMV
1001 compare_mode = GLOBALMV;
1002 }
1003 }
1004 if (this_mode == GLOBALMV) {
1005 if (ref_mv_count == 0 &&
1006 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1007 // GLOBALMV has the same motion vector as NEARESTMV
1008 compare_mode = NEARESTMV;
1009 }
1010 if (ref_mv_count == 1) {
1011 // GLOBALMV has the same motion vector as NEARMV
1012 compare_mode = NEARMV;
1013 }
1014 }
1015
1016 if (compare_mode != MB_MODE_COUNT) {
1017 // Use modelled_rd to check whether compare mode was searched
1018 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1019 INT64_MAX) {
1020 const int16_t mode_ctx =
1021 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1022 const int compare_cost =
1023 cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1024 const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1025
1026 // Only skip if the mode cost is larger than compare mode cost
1027 if (this_cost > compare_cost) {
1028 search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1029 search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1030 return 1;
1031 }
1032 }
1033 }
1034 }
1035 return 0;
1036 }
1037
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1038 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1039 const AV1_COMMON *cm,
1040 const MACROBLOCK *x) {
1041 const MACROBLOCKD *const xd = &x->e_mbd;
1042 *out_mv = in_mv;
1043 lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1044 cm->features.cur_frame_force_integer_mv);
1045 clamp_mv2(&out_mv->as_mv, xd);
1046 return av1_is_fullmv_in_range(&x->mv_limits,
1047 get_fullmv_from_mv(&out_mv->as_mv));
1048 }
1049
1050 // To use single newmv directly for compound modes, need to clamp the mv to the
1051 // valid mv range. Without this, encoder would generate out of range mv, and
1052 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1053 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1054 int ref_idx) {
1055 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1056 SubpelMvLimits mv_limits;
1057
1058 av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1059 clamp_mv(&mv->as_mv, &mv_limits);
1060 }
1061
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1062 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1063 const BLOCK_SIZE bsize, int_mv *cur_mv,
1064 int *const rate_mv, HandleInterModeArgs *const args,
1065 inter_mode_info *mode_info) {
1066 MACROBLOCKD *const xd = &x->e_mbd;
1067 MB_MODE_INFO *const mbmi = xd->mi[0];
1068 const int is_comp_pred = has_second_ref(mbmi);
1069 const PREDICTION_MODE this_mode = mbmi->mode;
1070 const int refs[2] = { mbmi->ref_frame[0],
1071 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1072 const int ref_mv_idx = mbmi->ref_mv_idx;
1073
1074 if (is_comp_pred) {
1075 const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1076 const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1077 if (this_mode == NEW_NEWMV) {
1078 if (valid_mv0) {
1079 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1080 clamp_mv_in_range(x, &cur_mv[0], 0);
1081 }
1082 if (valid_mv1) {
1083 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1084 clamp_mv_in_range(x, &cur_mv[1], 1);
1085 }
1086 *rate_mv = 0;
1087 for (int i = 0; i < 2; ++i) {
1088 const int_mv ref_mv = av1_get_ref_mv(x, i);
1089 *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1090 x->mv_costs->nmv_joint_cost,
1091 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1092 }
1093 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1094 if (valid_mv1) {
1095 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1096 clamp_mv_in_range(x, &cur_mv[1], 1);
1097 }
1098 const int_mv ref_mv = av1_get_ref_mv(x, 1);
1099 *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1100 x->mv_costs->nmv_joint_cost,
1101 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1102 } else {
1103 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1104 if (valid_mv0) {
1105 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1106 clamp_mv_in_range(x, &cur_mv[0], 0);
1107 }
1108 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1109 *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1110 x->mv_costs->nmv_joint_cost,
1111 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1112 }
1113 } else {
1114 // Single ref case.
1115 const int ref_idx = 0;
1116 int search_range = INT_MAX;
1117
1118 if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1119 const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1120 int min_mv_diff = INT_MAX;
1121 int best_match = -1;
1122 MV prev_ref_mv[2] = { { 0 } };
1123 for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1124 prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1125 idx, &x->mbmi_ext)
1126 .as_mv;
1127 const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1128 abs(ref_mv.col - prev_ref_mv[idx].col));
1129
1130 if (min_mv_diff > ref_mv_diff) {
1131 min_mv_diff = ref_mv_diff;
1132 best_match = idx;
1133 }
1134 }
1135
1136 if (min_mv_diff < (16 << 3)) {
1137 if (args->single_newmv_valid[best_match][refs[0]]) {
1138 search_range = min_mv_diff;
1139 search_range +=
1140 AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1141 prev_ref_mv[best_match].row),
1142 abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1143 prev_ref_mv[best_match].col));
1144 // Get full pixel search range.
1145 search_range = (search_range + 4) >> 3;
1146 }
1147 }
1148 }
1149
1150 int_mv best_mv;
1151 av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1152 mode_info, &best_mv);
1153 if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1154
1155 args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1156 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1157 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1158 cur_mv[0].as_int = best_mv.as_int;
1159 }
1160
1161 return 0;
1162 }
1163
update_mode_start_end_index(const AV1_COMP * const cpi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1164 static INLINE void update_mode_start_end_index(const AV1_COMP *const cpi,
1165 int *mode_index_start,
1166 int *mode_index_end,
1167 int last_motion_mode_allowed,
1168 int interintra_allowed,
1169 int eval_motion_mode) {
1170 *mode_index_start = (int)SIMPLE_TRANSLATION;
1171 *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1172 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1173 if (!eval_motion_mode) {
1174 *mode_index_end = (int)SIMPLE_TRANSLATION;
1175 } else {
1176 // Set the start index appropriately to process motion modes other than
1177 // simple translation
1178 *mode_index_start = 1;
1179 }
1180 }
1181 }
1182
1183 /*!\brief AV1 motion mode search
1184 *
1185 * \ingroup inter_mode_search
1186 * Function to search over and determine the motion mode. It will update
1187 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1188 * WARPED_CAUSAL and determine any necessary side information for the selected
1189 * motion mode. It will also perform the full transform search, unless the
1190 * input parameter do_tx_search indicates to do an estimation of the RD rather
1191 * than an RD corresponding to a full transform search. It will return the
1192 * RD for the final motion_mode.
1193 * Do the RD search for a given inter mode and compute all information relevant
1194 * to the input mode. It will compute the best MV,
1195 * compound parameters (if the mode is a compound mode) and interpolation filter
1196 * parameters.
1197 *
1198 * \param[in] cpi Top-level encoder structure.
1199 * \param[in] tile_data Pointer to struct holding adaptive
1200 * data/contexts/models for the tile during
1201 * encoding.
1202 * \param[in] x Pointer to struct holding all the data for
1203 * the current macroblock.
1204 * \param[in] bsize Current block size.
1205 * \param[in,out] rd_stats Struct to keep track of the overall RD
1206 * information.
1207 * \param[in,out] rd_stats_y Struct to keep track of the RD information
1208 * for only the Y plane.
1209 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
1210 * for only the UV planes.
1211 * \param[in] args HandleInterModeArgs struct holding
1212 * miscellaneous arguments for inter mode
1213 * search. See the documentation for this
1214 * struct for a description of each member.
1215 * \param[in] ref_best_rd Best RD found so far for this block.
1216 * It is used for early termination of this
1217 * search if the RD exceeds this value.
1218 * \param[in,out] ref_skip_rd A length 2 array, where skip_rd[0] is the
1219 * best total RD for a skip mode so far, and
1220 * skip_rd[1] is the best RD for a skip mode so
1221 * far in luma. This is used as a speed feature
1222 * to skip the transform search if the computed
1223 * skip RD for the current mode is not better
1224 * than the best skip_rd so far.
1225 * \param[in,out] rate_mv The rate associated with the motion vectors.
1226 * This will be modified if a motion search is
1227 * done in the motion mode search.
1228 * \param[in,out] orig_dst A prediction buffer to hold a computed
1229 * prediction. This will eventually hold the
1230 * final prediction, and the tmp_dst info will
1231 * be copied here.
1232 * \param[in,out] best_est_rd Estimated RD for motion mode search if
1233 * do_tx_search (see below) is 0.
1234 * \param[in] do_tx_search Parameter to indicate whether or not to do
1235 * a full transform search. This will compute
1236 * an estimated RD for the modes without the
1237 * transform search and later perform the full
1238 * transform search on the best candidates.
1239 * \param[in] inter_modes_info InterModesInfo struct to hold inter mode
1240 * information to perform a full transform
1241 * search only on winning candidates searched
1242 * with an estimate for transform coding RD.
1243 * \param[in] eval_motion_mode Boolean whether or not to evaluate motion
1244 * motion modes other than SIMPLE_TRANSLATION.
1245 * \param[out] yrd Stores the rdcost corresponding to encoding
1246 * the luma plane.
1247 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1248 * current motion mode being tested should be skipped. It returns 0 if the
1249 * motion mode search is a success.
1250 */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1251 static int64_t motion_mode_rd(
1252 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1253 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1254 RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1255 int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1256 int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1257 int eval_motion_mode, int64_t *yrd) {
1258 const AV1_COMMON *const cm = &cpi->common;
1259 const FeatureFlags *const features = &cm->features;
1260 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1261 const int num_planes = av1_num_planes(cm);
1262 MACROBLOCKD *xd = &x->e_mbd;
1263 MB_MODE_INFO *mbmi = xd->mi[0];
1264 const int is_comp_pred = has_second_ref(mbmi);
1265 const PREDICTION_MODE this_mode = mbmi->mode;
1266 const int rate2_nocoeff = rd_stats->rate;
1267 int best_xskip_txfm = 0;
1268 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1269 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1270 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1271 const int rate_mv0 = *rate_mv;
1272 const int interintra_allowed = cm->seq_params.enable_interintra_compound &&
1273 is_interintra_allowed(mbmi) &&
1274 mbmi->compound_idx;
1275 WARP_SAMPLE_INFO *const warp_sample_info =
1276 &x->warp_sample_info[mbmi->ref_frame[0]];
1277 int *pts0 = warp_sample_info->pts;
1278 int *pts_inref0 = warp_sample_info->pts_inref;
1279
1280 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1281 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1282 av1_invalid_rd_stats(&best_rd_stats);
1283 aom_clear_system_state();
1284 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1
1285 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1286 *yrd = INT64_MAX;
1287 if (features->switchable_motion_mode) {
1288 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1289 // is allowed.
1290 last_motion_mode_allowed = motion_mode_allowed(
1291 xd->global_motion, xd, mbmi, features->allow_warped_motion);
1292 }
1293
1294 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1295 // Collect projection samples used in least squares approximation of
1296 // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1297 if (warp_sample_info->num < 0) {
1298 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1299 }
1300 mbmi->num_proj_ref = warp_sample_info->num;
1301 }
1302 const int total_samples = mbmi->num_proj_ref;
1303 if (total_samples == 0) {
1304 // Do not search WARPED_CAUSAL if there are no samples to use to determine
1305 // warped parameters.
1306 last_motion_mode_allowed = OBMC_CAUSAL;
1307 }
1308
1309 const MB_MODE_INFO base_mbmi = *mbmi;
1310 MB_MODE_INFO best_mbmi;
1311 const int interp_filter = features->interp_filter;
1312 const int switchable_rate =
1313 av1_is_interp_needed(xd)
1314 ? av1_get_switchable_rate(x, xd, interp_filter,
1315 cm->seq_params.enable_dual_filter)
1316 : 0;
1317 int64_t best_rd = INT64_MAX;
1318 int best_rate_mv = rate_mv0;
1319 const int mi_row = xd->mi_row;
1320 const int mi_col = xd->mi_col;
1321 int mode_index_start, mode_index_end;
1322 // Modify the start and end index according to speed features. For example,
1323 // if SIMPLE_TRANSLATION has already been searched according to
1324 // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1325 // to avoid searching it again.
1326 update_mode_start_end_index(cpi, &mode_index_start, &mode_index_end,
1327 last_motion_mode_allowed, interintra_allowed,
1328 eval_motion_mode);
1329 // Main function loop. This loops over all of the possible motion modes and
1330 // computes RD to determine the best one. This process includes computing
1331 // any necessary side information for the motion mode and performing the
1332 // transform search.
1333 for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1334 mode_index++) {
1335 if (args->skip_motion_mode && mode_index) continue;
1336 int tmp_rate2 = rate2_nocoeff;
1337 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1338 int tmp_rate_mv = rate_mv0;
1339
1340 *mbmi = base_mbmi;
1341 if (is_interintra_mode) {
1342 // Only use SIMPLE_TRANSLATION for interintra
1343 mbmi->motion_mode = SIMPLE_TRANSLATION;
1344 } else {
1345 mbmi->motion_mode = (MOTION_MODE)mode_index;
1346 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1347 }
1348
1349 // Do not search OBMC if the probability of selecting it is below a
1350 // predetermined threshold for this update_type and block size.
1351 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
1352 const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
1353 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1354 if ((!cpi->oxcf.motion_mode_cfg.enable_obmc ||
1355 cpi->sf.rt_sf.use_nonrd_pick_mode || prune_obmc) &&
1356 mbmi->motion_mode == OBMC_CAUSAL)
1357 continue;
1358
1359 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1360 // SIMPLE_TRANSLATION mode: no need to recalculate.
1361 // The prediction is calculated before motion_mode_rd() is called in
1362 // handle_inter_mode()
1363 } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1364 const uint32_t cur_mv = mbmi->mv[0].as_int;
1365 // OBMC_CAUSAL not allowed for compound prediction
1366 assert(!is_comp_pred);
1367 if (have_newmv_in_inter_mode(this_mode)) {
1368 av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1369 &mbmi->mv[0]);
1370 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1371 }
1372 if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1373 // Build the predictor according to the current motion vector if it has
1374 // not already been built
1375 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1376 0, av1_num_planes(cm) - 1);
1377 }
1378 // Build the inter predictor by blending the predictor corresponding to
1379 // this MV, and the neighboring blocks using the OBMC model
1380 av1_build_obmc_inter_prediction(
1381 cm, xd, args->above_pred_buf, args->above_pred_stride,
1382 args->left_pred_buf, args->left_pred_stride);
1383 #if !CONFIG_REALTIME_ONLY
1384 } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1385 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1386 mbmi->motion_mode = WARPED_CAUSAL;
1387 mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1388 mbmi->interp_filters =
1389 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1390
1391 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1392 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1393 // Select the samples according to motion vector difference
1394 if (mbmi->num_proj_ref > 1) {
1395 mbmi->num_proj_ref = av1_selectSamples(
1396 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1397 }
1398
1399 // Compute the warped motion parameters with a least squares fit
1400 // using the collected samples
1401 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1402 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1403 &mbmi->wm_params, mi_row, mi_col)) {
1404 assert(!is_comp_pred);
1405 if (have_newmv_in_inter_mode(this_mode)) {
1406 // Refine MV for NEWMV mode
1407 const int_mv mv0 = mbmi->mv[0];
1408 const WarpedMotionParams wm_params0 = mbmi->wm_params;
1409 const int num_proj_ref0 = mbmi->num_proj_ref;
1410
1411 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1412 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1413 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1414 &ref_mv.as_mv, NULL);
1415
1416 // Refine MV in a small range.
1417 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1418 total_samples);
1419
1420 if (mv0.as_int != mbmi->mv[0].as_int) {
1421 // Keep the refined MV and WM parameters.
1422 tmp_rate_mv = av1_mv_bit_cost(
1423 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1424 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1425 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1426 } else {
1427 // Restore the old MV and WM parameters.
1428 mbmi->mv[0] = mv0;
1429 mbmi->wm_params = wm_params0;
1430 mbmi->num_proj_ref = num_proj_ref0;
1431 }
1432 }
1433
1434 // Build the warped predictor
1435 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1436 av1_num_planes(cm) - 1);
1437 } else {
1438 continue;
1439 }
1440 #endif // !CONFIG_REALTIME_ONLY
1441 } else if (is_interintra_mode) {
1442 const int ret =
1443 av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1444 &tmp_rate_mv, &tmp_rate2, orig_dst);
1445 if (ret < 0) continue;
1446 }
1447
1448 // If we are searching newmv and the mv is the same as refmv, skip the
1449 // current mode
1450 if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1451
1452 // Update rd_stats for the current motion mode
1453 txfm_info->skip_txfm = 0;
1454 rd_stats->dist = 0;
1455 rd_stats->sse = 0;
1456 rd_stats->skip_txfm = 1;
1457 rd_stats->rate = tmp_rate2;
1458 const ModeCosts *mode_costs = &x->mode_costs;
1459 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1460 if (interintra_allowed) {
1461 rd_stats->rate +=
1462 mode_costs->interintra_cost[size_group_lookup[bsize]]
1463 [mbmi->ref_frame[1] == INTRA_FRAME];
1464 }
1465 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1466 (mbmi->ref_frame[1] != INTRA_FRAME)) {
1467 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1468 rd_stats->rate +=
1469 mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1470 } else {
1471 rd_stats->rate +=
1472 mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1473 }
1474 }
1475
1476 int64_t this_yrd = INT64_MAX;
1477
1478 if (!do_tx_search) {
1479 // Avoid doing a transform search here to speed up the overall mode
1480 // search. It will be done later in the mode search if the current
1481 // motion mode seems promising.
1482 int64_t curr_sse = -1;
1483 int64_t sse_y = -1;
1484 int est_residue_cost = 0;
1485 int64_t est_dist = 0;
1486 int64_t est_rd = 0;
1487 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1488 curr_sse = get_sse(cpi, x, &sse_y);
1489 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1490 &est_residue_cost, &est_dist);
1491 (void)has_est_rd;
1492 assert(has_est_rd);
1493 } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1494 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1495 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1496 cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1497 NULL, &curr_sse, NULL, NULL, NULL);
1498 sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1499 }
1500 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1501 if (est_rd * 0.80 > *best_est_rd) {
1502 mbmi->ref_frame[1] = ref_frame_1;
1503 continue;
1504 }
1505 const int mode_rate = rd_stats->rate;
1506 rd_stats->rate += est_residue_cost;
1507 rd_stats->dist = est_dist;
1508 rd_stats->rdcost = est_rd;
1509 if (rd_stats->rdcost < *best_est_rd) {
1510 *best_est_rd = rd_stats->rdcost;
1511 assert(sse_y >= 0);
1512 ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
1513 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1514 : INT64_MAX;
1515 }
1516 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1517 if (!is_comp_pred) {
1518 assert(curr_sse >= 0);
1519 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1520 rd_stats->rdcost, rd_stats, rd_stats_y,
1521 rd_stats_uv, mbmi);
1522 }
1523 } else {
1524 assert(curr_sse >= 0);
1525 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1526 rd_stats->rdcost, rd_stats, rd_stats_y,
1527 rd_stats_uv, mbmi);
1528 }
1529 mbmi->skip_txfm = 0;
1530 } else {
1531 // Perform full transform search
1532 int64_t skip_rd = INT64_MAX;
1533 int64_t skip_rdy = INT64_MAX;
1534 if (cpi->sf.inter_sf.txfm_rd_gate_level) {
1535 // Check if the mode is good enough based on skip RD
1536 int64_t sse_y = INT64_MAX;
1537 int64_t curr_sse = get_sse(cpi, x, &sse_y);
1538 skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1539 skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1540 int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1541 cpi->sf.inter_sf.txfm_rd_gate_level, 0);
1542 if (!eval_txfm) continue;
1543 }
1544
1545 // Do transform search
1546 const int mode_rate = rd_stats->rate;
1547 if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1548 rd_stats->rate, ref_best_rd)) {
1549 if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1550 return INT64_MAX;
1551 }
1552 continue;
1553 }
1554 const int skip_ctx = av1_get_skip_txfm_context(xd);
1555 const int y_rate =
1556 rd_stats->skip_txfm
1557 ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1558 : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1559 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1560
1561 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1562 if (curr_rd < ref_best_rd) {
1563 ref_best_rd = curr_rd;
1564 ref_skip_rd[0] = skip_rd;
1565 ref_skip_rd[1] = skip_rdy;
1566 }
1567 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1568 inter_mode_data_push(
1569 tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1570 rd_stats_y->rate + rd_stats_uv->rate +
1571 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1572 }
1573 }
1574
1575 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1576 if (is_nontrans_global_motion(xd, xd->mi[0])) {
1577 mbmi->interp_filters =
1578 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1579 }
1580 }
1581
1582 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1583 if (mode_index == 0) {
1584 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1585 }
1586 if (mode_index == 0 || tmp_rd < best_rd) {
1587 // Update best_rd data if this is the best motion mode so far
1588 best_mbmi = *mbmi;
1589 best_rd = tmp_rd;
1590 best_rd_stats = *rd_stats;
1591 best_rd_stats_y = *rd_stats_y;
1592 best_rate_mv = tmp_rate_mv;
1593 *yrd = this_yrd;
1594 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1595 memcpy(best_blk_skip, txfm_info->blk_skip,
1596 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1597 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1598 best_xskip_txfm = mbmi->skip_txfm;
1599 }
1600 }
1601 // Update RD and mbmi stats for selected motion mode
1602 mbmi->ref_frame[1] = ref_frame_1;
1603 *rate_mv = best_rate_mv;
1604 if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1605 av1_invalid_rd_stats(rd_stats);
1606 restore_dst_buf(xd, *orig_dst, num_planes);
1607 return INT64_MAX;
1608 }
1609 *mbmi = best_mbmi;
1610 *rd_stats = best_rd_stats;
1611 *rd_stats_y = best_rd_stats_y;
1612 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1613 memcpy(txfm_info->blk_skip, best_blk_skip,
1614 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1615 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1616 txfm_info->skip_txfm = best_xskip_txfm;
1617
1618 restore_dst_buf(xd, *orig_dst, num_planes);
1619 return 0;
1620 }
1621
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst)1622 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1623 MACROBLOCK *const x, BLOCK_SIZE bsize,
1624 const BUFFER_SET *const orig_dst) {
1625 assert(bsize < BLOCK_SIZES_ALL);
1626 const AV1_COMMON *cm = &cpi->common;
1627 const int num_planes = av1_num_planes(cm);
1628 MACROBLOCKD *const xd = &x->e_mbd;
1629 const int mi_row = xd->mi_row;
1630 const int mi_col = xd->mi_col;
1631 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0,
1632 av1_num_planes(cm) - 1);
1633
1634 int64_t total_sse = 0;
1635 for (int plane = 0; plane < num_planes; ++plane) {
1636 const struct macroblock_plane *const p = &x->plane[plane];
1637 const struct macroblockd_plane *const pd = &xd->plane[plane];
1638 const BLOCK_SIZE plane_bsize =
1639 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1640 const int bw = block_size_wide[plane_bsize];
1641 const int bh = block_size_high[plane_bsize];
1642
1643 av1_subtract_plane(x, plane_bsize, plane);
1644 int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
1645 total_sse += sse;
1646 }
1647 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1648 rd_stats->dist = rd_stats->sse = total_sse;
1649 rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1650 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1651
1652 restore_dst_buf(xd, *orig_dst, num_planes);
1653 return 0;
1654 }
1655
1656 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1657 // mode
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1658 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1659 int ref_idx,
1660 const MV_REFERENCE_FRAME *ref_frame,
1661 PREDICTION_MODE single_mode) {
1662 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1663 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1664 assert(single_mode != NEWMV);
1665 if (single_mode == NEARESTMV) {
1666 return 0;
1667 } else if (single_mode == NEARMV) {
1668 // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1669 // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1670 if (ref_mv_count < 2) return 1;
1671 } else if (single_mode == GLOBALMV) {
1672 // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1673 if (ref_mv_count == 0) return 1;
1674 // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1675 else if (ref_mv_count == 1)
1676 return 0;
1677
1678 int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1679 // Check GLOBALMV is matching with any mv in ref_mv_stack
1680 for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1681 int_mv this_mv;
1682
1683 if (ref_idx == 0)
1684 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1685 else
1686 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1687
1688 if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1689 return 1;
1690 }
1691 }
1692 return 0;
1693 }
1694
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1695 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1696 int ref_idx, int ref_mv_idx,
1697 int skip_repeated_ref_mv,
1698 const MV_REFERENCE_FRAME *ref_frame,
1699 const MB_MODE_INFO_EXT *mbmi_ext) {
1700 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1701 assert(is_inter_singleref_mode(single_mode));
1702 if (single_mode == NEWMV) {
1703 this_mv->as_int = INVALID_MV;
1704 } else if (single_mode == GLOBALMV) {
1705 if (skip_repeated_ref_mv &&
1706 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1707 return 0;
1708 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1709 } else {
1710 assert(single_mode == NEARMV || single_mode == NEARESTMV);
1711 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1712 const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1713 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1714 assert(ref_mv_offset >= 0);
1715 if (ref_idx == 0) {
1716 *this_mv =
1717 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1718 } else {
1719 *this_mv =
1720 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1721 }
1722 } else {
1723 if (skip_repeated_ref_mv &&
1724 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1725 return 0;
1726 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1727 }
1728 }
1729 return 1;
1730 }
1731
1732 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1733 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1734 const AV1_COMMON *cm, const MACROBLOCK *x,
1735 int skip_repeated_ref_mv) {
1736 const MACROBLOCKD *xd = &x->e_mbd;
1737 const MB_MODE_INFO *mbmi = xd->mi[0];
1738 const int is_comp_pred = has_second_ref(mbmi);
1739
1740 int ret = 1;
1741 for (int i = 0; i < is_comp_pred + 1; ++i) {
1742 int_mv this_mv;
1743 this_mv.as_int = INVALID_MV;
1744 ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1745 skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1746 if (!ret) return 0;
1747 const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1748 if (single_mode == NEWMV) {
1749 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1750 cur_mv[i] =
1751 (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1752 .this_mv
1753 : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1754 .comp_mv;
1755 } else {
1756 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1757 }
1758 }
1759 return ret;
1760 }
1761
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1762 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1763 const MB_MODE_INFO_EXT *mbmi_ext,
1764 const int (*const drl_mode_cost0)[2],
1765 int8_t ref_frame_type) {
1766 int cost = 0;
1767 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1768 for (int idx = 0; idx < 2; ++idx) {
1769 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1770 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1771 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1772 if (mbmi->ref_mv_idx == idx) return cost;
1773 }
1774 }
1775 return cost;
1776 }
1777
1778 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1779 for (int idx = 1; idx < 3; ++idx) {
1780 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1781 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1782 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1783 if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1784 }
1785 }
1786 return cost;
1787 }
1788 return cost;
1789 }
1790
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1791 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1792 const MB_MODE_INFO *const mbmi,
1793 PREDICTION_MODE this_mode) {
1794 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1795 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1796 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1797 if (single_mode == NEWMV &&
1798 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1799 return 0;
1800 }
1801 }
1802 return 1;
1803 }
1804
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1805 static int get_drl_refmv_count(const MACROBLOCK *const x,
1806 const MV_REFERENCE_FRAME *ref_frame,
1807 PREDICTION_MODE mode) {
1808 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1809 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1810 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1811 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1812 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1813 const int has_drl =
1814 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1815 const int ref_set =
1816 has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1817
1818 return ref_set;
1819 }
1820
1821 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1822 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1823 const int qindex,
1824 const int ref_mv_idx) {
1825 if (reduce_inter_modes >= 3) return 1;
1826 // Q-index logic based pruning is enabled only for
1827 // reduce_inter_modes = 2.
1828 assert(reduce_inter_modes == 2);
1829 // When reduce_inter_modes=2, pruning happens as below based on q index.
1830 // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1831 // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1832 // For q index range between 171 and 255: no pruning.
1833 const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1834 return (ref_mv_idx >= min_prune_ref_mv_idx);
1835 }
1836
1837 // Whether this reference motion vector can be skipped, based on initial
1838 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1839 static bool ref_mv_idx_early_breakout(
1840 const SPEED_FEATURES *const sf,
1841 const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1842 const HandleInterModeArgs *const args, int64_t ref_best_rd,
1843 int ref_mv_idx) {
1844 MACROBLOCKD *xd = &x->e_mbd;
1845 MB_MODE_INFO *mbmi = xd->mi[0];
1846 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1847 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1848 const int is_comp_pred = has_second_ref(mbmi);
1849 if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1850 if (mbmi->ref_frame[0] == LAST2_FRAME ||
1851 mbmi->ref_frame[0] == LAST3_FRAME ||
1852 mbmi->ref_frame[1] == LAST2_FRAME ||
1853 mbmi->ref_frame[1] == LAST3_FRAME) {
1854 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1855 if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1856 REF_CAT_LEVEL) {
1857 return true;
1858 }
1859 }
1860 // TODO(any): Experiment with reduce_inter_modes for compound prediction
1861 if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1862 have_newmv_in_inter_mode(mbmi->mode)) {
1863 if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1864 mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1865 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1866 const int do_prune = prune_ref_mv_idx_using_qindex(
1867 sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1868 if (do_prune &&
1869 (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1870 REF_CAT_LEVEL)) {
1871 return true;
1872 }
1873 }
1874 }
1875 }
1876
1877 mbmi->ref_mv_idx = ref_mv_idx;
1878 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1879 return true;
1880 }
1881 size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1882 const int drl_cost = get_drl_cost(
1883 mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1884 est_rd_rate += drl_cost;
1885 if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1886 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1887 return true;
1888 }
1889 return false;
1890 }
1891
1892 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,inter_mode_info * mode_info,int64_t ref_best_rd,BLOCK_SIZE bsize)1893 static int64_t simple_translation_pred_rd(
1894 AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats,
1895 HandleInterModeArgs *args, int ref_mv_idx, inter_mode_info *mode_info,
1896 int64_t ref_best_rd, BLOCK_SIZE bsize) {
1897 MACROBLOCKD *xd = &x->e_mbd;
1898 MB_MODE_INFO *mbmi = xd->mi[0];
1899 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1900 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1901 const AV1_COMMON *cm = &cpi->common;
1902 const int is_comp_pred = has_second_ref(mbmi);
1903 const ModeCosts *mode_costs = &x->mode_costs;
1904
1905 struct macroblockd_plane *p = xd->plane;
1906 const BUFFER_SET orig_dst = {
1907 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1908 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1909 };
1910 av1_init_rd_stats(rd_stats);
1911
1912 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1913 mbmi->comp_group_idx = 0;
1914 mbmi->compound_idx = 1;
1915 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1916 mbmi->ref_frame[1] = NONE_FRAME;
1917 }
1918 int16_t mode_ctx =
1919 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1920
1921 mbmi->num_proj_ref = 0;
1922 mbmi->motion_mode = SIMPLE_TRANSLATION;
1923 mbmi->ref_mv_idx = ref_mv_idx;
1924
1925 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1926 const int drl_cost =
1927 get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
1928 rd_stats->rate += drl_cost;
1929 mode_info[ref_mv_idx].drl_cost = drl_cost;
1930
1931 int_mv cur_mv[2];
1932 if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
1933 return INT64_MAX;
1934 }
1935 assert(have_nearmv_in_inter_mode(mbmi->mode));
1936 for (int i = 0; i < is_comp_pred + 1; ++i) {
1937 mbmi->mv[i].as_int = cur_mv[i].as_int;
1938 }
1939 const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
1940 rd_stats->rate += ref_mv_cost;
1941
1942 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
1943 return INT64_MAX;
1944 }
1945
1946 mbmi->motion_mode = SIMPLE_TRANSLATION;
1947 mbmi->num_proj_ref = 0;
1948 if (is_comp_pred) {
1949 // Only compound_average
1950 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1951 mbmi->comp_group_idx = 0;
1952 mbmi->compound_idx = 1;
1953 }
1954 set_default_interp_filters(mbmi, cm->features.interp_filter);
1955
1956 const int mi_row = xd->mi_row;
1957 const int mi_col = xd->mi_col;
1958 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
1959 AOM_PLANE_Y, AOM_PLANE_Y);
1960 int est_rate;
1961 int64_t est_dist;
1962 model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
1963 NULL, NULL, NULL, NULL, NULL);
1964 return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
1965 }
1966
1967 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
1968 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
1969 // it is included.
mask_set_bit(int * mask,int index)1970 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
1971
mask_check_bit(int mask,int index)1972 static INLINE bool mask_check_bit(int mask, int index) {
1973 return (mask >> index) & 0x1;
1974 }
1975
1976 // Before performing the full MV search in handle_inter_mode, do a simple
1977 // translation search and see if we can eliminate any motion vectors.
1978 // Returns an integer where, if the i-th bit is set, it means that the i-th
1979 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,inter_mode_info * mode_info,BLOCK_SIZE bsize,const int ref_set)1980 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
1981 RD_STATS *rd_stats,
1982 HandleInterModeArgs *const args,
1983 int64_t ref_best_rd, inter_mode_info *mode_info,
1984 BLOCK_SIZE bsize, const int ref_set) {
1985 AV1_COMMON *const cm = &cpi->common;
1986 const MACROBLOCKD *const xd = &x->e_mbd;
1987 const MB_MODE_INFO *const mbmi = xd->mi[0];
1988 const PREDICTION_MODE this_mode = mbmi->mode;
1989
1990 // Only search indices if they have some chance of being good.
1991 int good_indices = 0;
1992 for (int i = 0; i < ref_set; ++i) {
1993 if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
1994 ref_best_rd, i)) {
1995 continue;
1996 }
1997 mask_set_bit(&good_indices, i);
1998 }
1999
2000 // Only prune in NEARMV mode, if the speed feature is set, and the block size
2001 // is large enough. If these conditions are not met, return all good indices
2002 // found so far.
2003 if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2004 return good_indices;
2005 if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2006 if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2007 // Do not prune when there is internal resizing. TODO(elliottk) fix this
2008 // so b/2384 can be resolved.
2009 if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2010 (mbmi->ref_frame[1] > 0 &&
2011 av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2012 return good_indices;
2013 }
2014
2015 // Calculate the RD cost for the motion vectors using simple translation.
2016 int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2017 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2018 // If this index is bad, ignore it.
2019 if (!mask_check_bit(good_indices, ref_mv_idx)) {
2020 continue;
2021 }
2022 idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2023 cpi, x, rd_stats, args, ref_mv_idx, mode_info, ref_best_rd, bsize);
2024 }
2025 // Find the index with the best RD cost.
2026 int best_idx = 0;
2027 for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2028 if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2029 best_idx = i;
2030 }
2031 }
2032 // Only include indices that are good and within a % of the best.
2033 const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2034 // If the simple translation cost is not within this multiple of the
2035 // best RD, skip it. Note that the cutoff is derived experimentally.
2036 const double ref_dth = 5;
2037 int result = 0;
2038 for (int i = 0; i < ref_set; ++i) {
2039 if (mask_check_bit(good_indices, i) &&
2040 (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2041 (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2042 mask_set_bit(&result, i);
2043 }
2044 }
2045 return result;
2046 }
2047
2048 /*!\brief Motion mode information for inter mode search speedup.
2049 *
2050 * Used in a speed feature to search motion modes other than
2051 * SIMPLE_TRANSLATION only on winning candidates.
2052 */
2053 typedef struct motion_mode_candidate {
2054 /*!
2055 * Mode info for the motion mode candidate.
2056 */
2057 MB_MODE_INFO mbmi;
2058 /*!
2059 * Rate describing the cost of the motion vectors for this candidate.
2060 */
2061 int rate_mv;
2062 /*!
2063 * Rate before motion mode search and transform coding is applied.
2064 */
2065 int rate2_nocoeff;
2066 /*!
2067 * An integer value 0 or 1 which indicates whether or not to skip the motion
2068 * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2069 * candidate.
2070 */
2071 int skip_motion_mode;
2072 /*!
2073 * Total RD cost for this candidate.
2074 */
2075 int64_t rd_cost;
2076 } motion_mode_candidate;
2077
2078 /*!\cond */
2079 typedef struct motion_mode_best_st_candidate {
2080 motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2081 int num_motion_mode_cand;
2082 } motion_mode_best_st_candidate;
2083
2084 // Checks if the current reference frame matches with neighbouring block's
2085 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2086 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2087 MB_MODE_INFO *nb_mbmi) {
2088 MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2089 nb_mbmi->ref_frame[1] };
2090 MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2091 cur_mbmi->ref_frame[1] };
2092 const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2093 int match_found = 0;
2094
2095 for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2096 if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2097 (cur_ref_frames[i] == nb_ref_frames[1]))
2098 match_found = 1;
2099 }
2100 return match_found;
2101 }
2102
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2103 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2104 MACROBLOCKD *xd) {
2105 if (!xd->up_available) return 1;
2106 const int mi_col = xd->mi_col;
2107 MB_MODE_INFO **cur_mbmi = xd->mi;
2108 // prev_row_mi points into the mi array, starting at the beginning of the
2109 // previous row.
2110 MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2111 const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2112 uint8_t mi_step;
2113 for (int above_mi_col = mi_col; above_mi_col < end_col;
2114 above_mi_col += mi_step) {
2115 MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2116 mi_step = mi_size_wide[above_mi[0]->bsize];
2117 int match_found = 0;
2118 if (is_inter_block(*above_mi))
2119 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2120 if (match_found) return 1;
2121 }
2122 return 0;
2123 }
2124
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2125 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2126 MACROBLOCKD *xd) {
2127 if (!xd->left_available) return 1;
2128 const int mi_row = xd->mi_row;
2129 MB_MODE_INFO **cur_mbmi = xd->mi;
2130 // prev_col_mi points into the mi array, starting at the top of the
2131 // previous column
2132 MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2133 const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2134 uint8_t mi_step;
2135 for (int left_mi_row = mi_row; left_mi_row < end_row;
2136 left_mi_row += mi_step) {
2137 MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2138 mi_step = mi_size_high[left_mi[0]->bsize];
2139 int match_found = 0;
2140 if (is_inter_block(*left_mi))
2141 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2142 if (match_found) return 1;
2143 }
2144 return 0;
2145 }
2146 /*!\endcond */
2147
2148 /*! \brief Struct used to hold TPL data to
2149 * narrow down parts of the inter mode search.
2150 */
2151 typedef struct {
2152 /*!
2153 * The best inter cost out of all of the reference frames.
2154 */
2155 int64_t best_inter_cost;
2156 /*!
2157 * The inter cost for each reference frame.
2158 */
2159 int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2160 } PruneInfoFromTpl;
2161
2162 #if !CONFIG_REALTIME_ONLY
2163 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2164 static AOM_INLINE void get_block_level_tpl_stats(
2165 AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2166 PruneInfoFromTpl *inter_cost_info_from_tpl) {
2167 const GF_GROUP *const gf_group = &cpi->gf_group;
2168 AV1_COMMON *const cm = &cpi->common;
2169
2170 assert(IMPLIES(gf_group->size > 0, gf_group->index < gf_group->size));
2171 const int tpl_idx = gf_group->index;
2172 TplParams *const tpl_data = &cpi->tpl_data;
2173 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2174 if (tpl_idx >= MAX_TPL_FRAME_IDX || !tpl_frame->is_valid) {
2175 return;
2176 }
2177
2178 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2179 const int mi_wide = mi_size_wide[bsize];
2180 const int mi_high = mi_size_high[bsize];
2181 const int tpl_stride = tpl_frame->stride;
2182 const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2183 const int mi_col_sr =
2184 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2185 const int mi_col_end_sr =
2186 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2187 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2188
2189 const int row_step = step;
2190 const int col_step_sr =
2191 coded_to_superres_mi(step, cm->superres_scale_denominator);
2192 for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2193 row += row_step) {
2194 for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2195 col += col_step_sr) {
2196 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2197 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2198
2199 // Sums up the inter cost of corresponding ref frames
2200 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2201 inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2202 this_stats->pred_error[ref_idx];
2203 }
2204 }
2205 }
2206
2207 // Computes the best inter cost (minimum inter_cost)
2208 int64_t best_inter_cost = INT64_MAX;
2209 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2210 const int64_t cur_inter_cost =
2211 inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2212 // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2213 // calculating the minimum inter_cost
2214 if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2215 valid_refs[ref_idx])
2216 best_inter_cost = cur_inter_cost;
2217 }
2218 inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2219 }
2220 #endif
2221
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2222 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2223 PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2224 const PREDICTION_MODE this_mode, int prune_mode_level) {
2225 const int have_newmv = have_newmv_in_inter_mode(this_mode);
2226 if ((prune_mode_level < 2) && have_newmv) return 0;
2227
2228 const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2229 if (best_inter_cost == INT64_MAX) return 0;
2230
2231 const int prune_level = prune_mode_level - 1;
2232 int64_t cur_inter_cost;
2233
2234 const int is_globalmv =
2235 (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2236 const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2237
2238 // Thresholds used for pruning:
2239 // Lower value indicates aggressive pruning and higher value indicates
2240 // conservative pruning which is set based on ref_mv_idx and speed feature.
2241 // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2242 // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2243 static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2244 { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2245 };
2246
2247 const int is_comp_pred = (refs[1] > INTRA_FRAME);
2248 if (!is_comp_pred) {
2249 cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2250 } else {
2251 const int64_t inter_cost_ref0 =
2252 inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2253 const int64_t inter_cost_ref1 =
2254 inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2255 // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2256 // more aggressive pruning
2257 cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2258 }
2259
2260 // Prune the mode if cur_inter_cost is greater than threshold times
2261 // best_inter_cost
2262 if (cur_inter_cost >
2263 ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2264 best_inter_cost) >>
2265 2))
2266 return 1;
2267 return 0;
2268 }
2269
2270 // If the current mode being searched is NEWMV, this function will look
2271 // at previously searched MVs and check if they are the same
2272 // as the current MV. If it finds that this MV is repeated, it compares
2273 // the cost to the previous MV and skips the rest of the search if it is
2274 // more expensive.
skip_repeated_newmv(AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,const int do_tx_search,const PREDICTION_MODE this_mode,MB_MODE_INFO * best_mbmi,motion_mode_candidate * motion_mode_cand,int64_t * ref_best_rd,RD_STATS * best_rd_stats,RD_STATS * best_rd_stats_y,RD_STATS * best_rd_stats_uv,inter_mode_info * mode_info,HandleInterModeArgs * args,int drl_cost,const int * refs,int_mv * cur_mv,int64_t * best_rd,const BUFFER_SET orig_dst,int ref_mv_idx)2275 static int skip_repeated_newmv(
2276 AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2277 const int do_tx_search, const PREDICTION_MODE this_mode,
2278 MB_MODE_INFO *best_mbmi, motion_mode_candidate *motion_mode_cand,
2279 int64_t *ref_best_rd, RD_STATS *best_rd_stats, RD_STATS *best_rd_stats_y,
2280 RD_STATS *best_rd_stats_uv, inter_mode_info *mode_info,
2281 HandleInterModeArgs *args, int drl_cost, const int *refs, int_mv *cur_mv,
2282 int64_t *best_rd, const BUFFER_SET orig_dst, int ref_mv_idx) {
2283 // This feature only works for NEWMV when a previous mv has been searched
2284 if (this_mode != NEWMV || ref_mv_idx == 0) return 0;
2285 MACROBLOCKD *xd = &x->e_mbd;
2286 const AV1_COMMON *cm = &cpi->common;
2287 const int num_planes = av1_num_planes(cm);
2288
2289 int skip = 0;
2290 int this_rate_mv = 0;
2291 int i;
2292 for (i = 0; i < ref_mv_idx; ++i) {
2293 // Check if the motion search result same as previous results
2294 if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int &&
2295 args->single_newmv_valid[i][refs[0]]) {
2296 // If the compared mode has no valid rd, it is unlikely this
2297 // mode will be the best mode
2298 if (mode_info[i].rd == INT64_MAX) {
2299 skip = 1;
2300 break;
2301 }
2302 // Compare the cost difference including drl cost and mv cost
2303 if (mode_info[i].mv.as_int != INVALID_MV) {
2304 const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost;
2305 const int_mv ref_mv = av1_get_ref_mv(x, 0);
2306 this_rate_mv = av1_mv_bit_cost(
2307 &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
2308 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
2309 const int this_cost = this_rate_mv + drl_cost;
2310
2311 if (compare_cost <= this_cost) {
2312 // Skip this mode if it is more expensive as the previous result
2313 // for this MV
2314 skip = 1;
2315 break;
2316 } else {
2317 // If the cost is less than current best result, make this
2318 // the best and update corresponding variables unless the
2319 // best_mv is the same as ref_mv. In this case we skip and
2320 // rely on NEAR(EST)MV instead
2321 if (best_mbmi->ref_mv_idx == i &&
2322 best_mbmi->mv[0].as_int != ref_mv.as_int) {
2323 assert(*best_rd != INT64_MAX);
2324 assert(best_mbmi->mv[0].as_int == mode_info[i].mv.as_int);
2325 best_mbmi->ref_mv_idx = ref_mv_idx;
2326 motion_mode_cand->rate_mv = this_rate_mv;
2327 best_rd_stats->rate += this_cost - compare_cost;
2328 *best_rd =
2329 RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
2330 // We also need to update mode_info here because we are setting
2331 // (ref_)best_rd here. So we will not be able to search the same
2332 // mode again with the current configuration.
2333 mode_info[ref_mv_idx].mv.as_int = best_mbmi->mv[0].as_int;
2334 mode_info[ref_mv_idx].rate_mv = this_rate_mv;
2335 mode_info[ref_mv_idx].rd = *best_rd;
2336 if (*best_rd < *ref_best_rd) *ref_best_rd = *best_rd;
2337 break;
2338 }
2339 }
2340 }
2341 }
2342 }
2343 if (skip) {
2344 const THR_MODES mode_enum = get_prediction_mode_idx(
2345 best_mbmi->mode, best_mbmi->ref_frame[0], best_mbmi->ref_frame[1]);
2346 // Collect mode stats for multiwinner mode processing
2347 store_winner_mode_stats(
2348 &cpi->common, x, best_mbmi, best_rd_stats, best_rd_stats_y,
2349 best_rd_stats_uv, mode_enum, NULL, bsize, *best_rd,
2350 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
2351 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] =
2352 args->modelled_rd[this_mode][i][refs[0]];
2353 args->simple_rd[this_mode][ref_mv_idx][refs[0]] =
2354 args->simple_rd[this_mode][i][refs[0]];
2355 mode_info[ref_mv_idx].rd = mode_info[i].rd;
2356 mode_info[ref_mv_idx].rate_mv = this_rate_mv;
2357 mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int;
2358
2359 restore_dst_buf(xd, orig_dst, num_planes);
2360 return 1;
2361 }
2362 return 0;
2363 }
2364
2365 /*!\brief High level function to select parameters for compound mode.
2366 *
2367 * \ingroup inter_mode_search
2368 * The main search functionality is done in the call to av1_compound_type_rd().
2369 *
2370 * \param[in] cpi Top-level encoder structure.
2371 * \param[in] x Pointer to struct holding all the data for
2372 * the current macroblock.
2373 * \param[in] args HandleInterModeArgs struct holding
2374 * miscellaneous arguments for inter mode
2375 * search. See the documentation for this
2376 * struct for a description of each member.
2377 * \param[in] ref_best_rd Best RD found so far for this block.
2378 * It is used for early termination of this
2379 * search if the RD exceeds this value.
2380 * \param[in,out] cur_mv Current motion vector.
2381 * \param[in] bsize Current block size.
2382 * \param[in,out] compmode_interinter_cost RD of the selected interinter
2383 compound mode.
2384 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2385 * allocated buffers for the compound
2386 * predictors and masks in the compound type
2387 * search.
2388 * \param[in,out] orig_dst A prediction buffer to hold a computed
2389 * prediction. This will eventually hold the
2390 * final prediction, and the tmp_dst info will
2391 * be copied here.
2392 * \param[in] tmp_dst A temporary prediction buffer to hold a
2393 * computed prediction.
2394 * \param[in,out] rate_mv The rate associated with the motion vectors.
2395 * This will be modified if a motion search is
2396 * done in the motion mode search.
2397 * \param[in,out] rd_stats Struct to keep track of the overall RD
2398 * information.
2399 * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the
2400 * best total RD for a skip mode so far, and
2401 * skip_rd[1] is the best RD for a skip mode so
2402 * far in luma. This is used as a speed feature
2403 * to skip the transform search if the computed
2404 * skip RD for the current mode is not better
2405 * than the best skip_rd so far.
2406 * \param[in,out] skip_build_pred Indicates whether or not to build the inter
2407 * predictor. If this is 0, the inter predictor
2408 * has already been built and thus we can avoid
2409 * repeating computation.
2410 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2411 * a viable candidate.
2412 */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2413 static int process_compound_inter_mode(
2414 AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2415 int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2416 int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2417 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2418 RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2419 MACROBLOCKD *xd = &x->e_mbd;
2420 MB_MODE_INFO *mbmi = xd->mi[0];
2421 const AV1_COMMON *cm = &cpi->common;
2422 const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2423 cm->seq_params.enable_masked_compound;
2424 int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2425 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2426
2427 const int num_planes = av1_num_planes(cm);
2428 const int mi_row = xd->mi_row;
2429 const int mi_col = xd->mi_col;
2430 int is_luma_interp_done = 0;
2431 set_default_interp_filters(mbmi, cm->features.interp_filter);
2432
2433 int64_t best_rd_compound;
2434 int64_t rd_thresh;
2435 const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2436 const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2437 rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2438 comp_type_rd_scale);
2439 // Select compound type and any parameters related to that type
2440 // (for example, the mask parameters if it is a masked mode) and compute
2441 // the RD
2442 *compmode_interinter_cost = av1_compound_type_rd(
2443 cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2444 orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2445 ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2446 if (ref_best_rd < INT64_MAX &&
2447 (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2448 ref_best_rd) {
2449 restore_dst_buf(xd, *orig_dst, num_planes);
2450 return 1;
2451 }
2452
2453 // Build only uv predictor for COMPOUND_AVERAGE.
2454 // Note there is no need to call av1_enc_build_inter_predictor
2455 // for luma if COMPOUND_AVERAGE is selected because it is the first
2456 // candidate in av1_compound_type_rd, which means it used the dst_buf
2457 // rather than the tmp_buf.
2458 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2459 if (num_planes > 1) {
2460 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2461 AOM_PLANE_U, num_planes - 1);
2462 }
2463 *skip_build_pred = 1;
2464 }
2465 return 0;
2466 }
2467
2468 // Speed feature to prune out MVs that are similar to previous MVs if they
2469 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2470 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2471 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2472 MB_MODE_INFO *mbmi, int pruning_factor) {
2473 int i;
2474 const int is_comp_pred = has_second_ref(mbmi);
2475 const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2476
2477 // Skip the evaluation if an MV match is found.
2478 if (ref_mv_idx > 0) {
2479 for (int idx = 0; idx < ref_mv_idx; ++idx) {
2480 if (save_mv[idx][0].as_int == INVALID_MV) continue;
2481
2482 int mv_diff = 0;
2483 for (i = 0; i < 1 + is_comp_pred; ++i) {
2484 mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2485 abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2486 }
2487
2488 // If this mode is not the best one, and current MV is similar to
2489 // previous stored MV, terminate this ref_mv_idx evaluation.
2490 if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2491 }
2492 }
2493
2494 if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2495 for (i = 0; i < is_comp_pred + 1; ++i)
2496 save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2497 }
2498
2499 return 0;
2500 }
2501
2502 /*!\brief AV1 inter mode RD computation
2503 *
2504 * \ingroup inter_mode_search
2505 * Do the RD search for a given inter mode and compute all information relevant
2506 * to the input mode. It will compute the best MV,
2507 * compound parameters (if the mode is a compound mode) and interpolation filter
2508 * parameters.
2509 *
2510 * \param[in] cpi Top-level encoder structure.
2511 * \param[in] tile_data Pointer to struct holding adaptive
2512 * data/contexts/models for the tile during
2513 * encoding.
2514 * \param[in] x Pointer to structure holding all the data
2515 * for the current macroblock.
2516 * \param[in] bsize Current block size.
2517 * \param[in,out] rd_stats Struct to keep track of the overall RD
2518 * information.
2519 * \param[in,out] rd_stats_y Struct to keep track of the RD information
2520 * for only the Y plane.
2521 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
2522 * for only the UV planes.
2523 * \param[in] args HandleInterModeArgs struct holding
2524 * miscellaneous arguments for inter mode
2525 * search. See the documentation for this
2526 * struct for a description of each member.
2527 * \param[in] ref_best_rd Best RD found so far for this block.
2528 * It is used for early termination of this
2529 * search if the RD exceeds this value.
2530 * \param[in] tmp_buf Temporary buffer used to hold predictors
2531 * built in this search.
2532 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2533 * allocated buffers for the compound
2534 * predictors and masks in the compound type
2535 * search.
2536 * \param[in,out] best_est_rd Estimated RD for motion mode search if
2537 * do_tx_search (see below) is 0.
2538 * \param[in] do_tx_search Parameter to indicate whether or not to do
2539 * a full transform search. This will compute
2540 * an estimated RD for the modes without the
2541 * transform search and later perform the full
2542 * transform search on the best candidates.
2543 * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode
2544 * information to perform a full transform
2545 * search only on winning candidates searched
2546 * with an estimate for transform coding RD.
2547 * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store
2548 * motion mode information used in a speed
2549 * feature to search motion modes other than
2550 * SIMPLE_TRANSLATION only on winning
2551 * candidates.
2552 * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the
2553 * best total RD for a skip mode so far, and
2554 * skip_rd[1] is the best RD for a skip mode so
2555 * far in luma. This is used as a speed feature
2556 * to skip the transform search if the computed
2557 * skip RD for the current mode is not better
2558 * than the best skip_rd so far.
2559 * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2560 * narrow down the search based on data
2561 * collected in the TPL model.
2562 * \param[out] yrd Stores the rdcost corresponding to encoding
2563 * the luma plane.
2564 *
2565 * \return The RD cost for the mode being searched.
2566 */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2567 static int64_t handle_inter_mode(
2568 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2569 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2570 RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2571 uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2572 int64_t *best_est_rd, const int do_tx_search,
2573 InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2574 int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2575 int64_t *yrd) {
2576 const AV1_COMMON *cm = &cpi->common;
2577 const int num_planes = av1_num_planes(cm);
2578 MACROBLOCKD *xd = &x->e_mbd;
2579 MB_MODE_INFO *mbmi = xd->mi[0];
2580 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2581 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2582 const int is_comp_pred = has_second_ref(mbmi);
2583 const PREDICTION_MODE this_mode = mbmi->mode;
2584
2585 const GF_GROUP *const gf_group = &cpi->gf_group;
2586 const int tpl_idx = gf_group->index;
2587 TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
2588 const int prune_modes_based_on_tpl =
2589 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2590 tpl_idx < MAX_TPL_FRAME_IDX && tpl_frame->is_valid;
2591 int i;
2592 // Reference frames for this mode
2593 const int refs[2] = { mbmi->ref_frame[0],
2594 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2595 int rate_mv = 0;
2596 int64_t rd = INT64_MAX;
2597 // Do first prediction into the destination buffer. Do the next
2598 // prediction into a temporary buffer. Then keep track of which one
2599 // of these currently holds the best predictor, and use the other
2600 // one for future predictions. In the end, copy from tmp_buf to
2601 // dst if necessary.
2602 struct macroblockd_plane *p = xd->plane;
2603 const BUFFER_SET orig_dst = {
2604 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
2605 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
2606 };
2607 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2608 tmp_buf + 2 * MAX_SB_SQUARE },
2609 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2610
2611 int64_t ret_val = INT64_MAX;
2612 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2613 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2614 int64_t best_rd = INT64_MAX;
2615 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2616 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2617 int64_t best_yrd = INT64_MAX;
2618 MB_MODE_INFO best_mbmi = *mbmi;
2619 int best_xskip_txfm = 0;
2620 int64_t newmv_ret_val = INT64_MAX;
2621 inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2622
2623 // Do not prune the mode based on inter cost from tpl if the current ref frame
2624 // is the winner ref in neighbouring blocks.
2625 int ref_match_found_in_above_nb = 0;
2626 int ref_match_found_in_left_nb = 0;
2627 if (prune_modes_based_on_tpl) {
2628 ref_match_found_in_above_nb =
2629 find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2630 ref_match_found_in_left_nb =
2631 find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2632 }
2633
2634 // First, perform a simple translation search for each of the indices. If
2635 // an index performs well, it will be fully searched in the main loop
2636 // of this function.
2637 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2638 // Save MV results from first 2 ref_mv_idx.
2639 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2640 int best_ref_mv_idx = -1;
2641 const int idx_mask = ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd,
2642 mode_info, bsize, ref_set);
2643 const int16_t mode_ctx =
2644 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2645 const ModeCosts *mode_costs = &x->mode_costs;
2646 const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2647 const int base_rate =
2648 args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2649
2650 for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2651 save_mv[i][0].as_int = INVALID_MV;
2652 save_mv[i][1].as_int = INVALID_MV;
2653 }
2654
2655 // Main loop of this function. This will iterate over all of the ref mvs
2656 // in the dynamic reference list and do the following:
2657 // 1.) Get the current MV. Create newmv MV if necessary
2658 // 2.) Search compound type and parameters if applicable
2659 // 3.) Do interpolation filter search
2660 // 4.) Build the inter predictor
2661 // 5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2662 // WARPED_CAUSAL)
2663 // 6.) Update stats if best so far
2664 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2665 mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2666 mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
2667 mode_info[ref_mv_idx].rd = INT64_MAX;
2668
2669 if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2670 // MV did not perform well in simple translation search. Skip it.
2671 continue;
2672 }
2673 if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2674 !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2675 // Skip mode if TPL model indicates it will not be beneficial.
2676 if (prune_modes_based_on_tpl_stats(
2677 inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2678 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2679 continue;
2680 }
2681 av1_init_rd_stats(rd_stats);
2682
2683 // Initialize compound mode data
2684 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2685 mbmi->comp_group_idx = 0;
2686 mbmi->compound_idx = 1;
2687 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2688
2689 mbmi->num_proj_ref = 0;
2690 mbmi->motion_mode = SIMPLE_TRANSLATION;
2691 mbmi->ref_mv_idx = ref_mv_idx;
2692
2693 // Compute cost for signalling this DRL index
2694 rd_stats->rate = base_rate;
2695 const int drl_cost = get_drl_cost(
2696 mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2697 rd_stats->rate += drl_cost;
2698 mode_info[ref_mv_idx].drl_cost = drl_cost;
2699
2700 int rs = 0;
2701 int compmode_interinter_cost = 0;
2702
2703 int_mv cur_mv[2];
2704
2705 // TODO(Cherma): Extend this speed feature to support compound mode
2706 int skip_repeated_ref_mv =
2707 is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2708 // Generate the current mv according to the prediction mode
2709 if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2710 continue;
2711 }
2712
2713 // The above call to build_cur_mv does not handle NEWMV modes. Build
2714 // the mv here if we have NEWMV for any predictors.
2715 if (have_newmv_in_inter_mode(this_mode)) {
2716 #if CONFIG_COLLECT_COMPONENT_TIMING
2717 start_timing(cpi, handle_newmv_time);
2718 #endif
2719 newmv_ret_val =
2720 handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2721 #if CONFIG_COLLECT_COMPONENT_TIMING
2722 end_timing(cpi, handle_newmv_time);
2723 #endif
2724
2725 if (newmv_ret_val != 0) continue;
2726
2727 rd_stats->rate += rate_mv;
2728
2729 // skip NEWMV mode in drl if the motion search result is the same
2730 // as a previous result
2731 if (cpi->sf.inter_sf.skip_repeated_newmv &&
2732 skip_repeated_newmv(cpi, x, bsize, do_tx_search, this_mode,
2733 &best_mbmi, motion_mode_cand, &ref_best_rd,
2734 &best_rd_stats, &best_rd_stats_y,
2735 &best_rd_stats_uv, mode_info, args, drl_cost,
2736 refs, cur_mv, &best_rd, orig_dst, ref_mv_idx))
2737 continue;
2738 }
2739 // Copy the motion vector for this mode into mbmi struct
2740 for (i = 0; i < is_comp_pred + 1; ++i) {
2741 mbmi->mv[i].as_int = cur_mv[i].as_int;
2742 }
2743
2744 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2745 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2746 continue;
2747 }
2748
2749 // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2750 // is enabled, and the current MV is similar to a previous one.
2751 if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2752 prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2753 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2754 continue;
2755
2756 #if CONFIG_COLLECT_COMPONENT_TIMING
2757 start_timing(cpi, compound_type_rd_time);
2758 #endif
2759 int skip_build_pred = 0;
2760 const int mi_row = xd->mi_row;
2761 const int mi_col = xd->mi_col;
2762
2763 // Handle a compound predictor, continue if it is determined this
2764 // cannot be the best compound mode
2765 if (is_comp_pred) {
2766 const int not_best_mode = process_compound_inter_mode(
2767 cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2768 rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2769 &skip_build_pred);
2770 if (not_best_mode) continue;
2771 }
2772
2773 #if CONFIG_COLLECT_COMPONENT_TIMING
2774 end_timing(cpi, compound_type_rd_time);
2775 #endif
2776
2777 #if CONFIG_COLLECT_COMPONENT_TIMING
2778 start_timing(cpi, interpolation_filter_search_time);
2779 #endif
2780 // Determine the interpolation filter for this mode
2781 ret_val = av1_interpolation_filter_search(
2782 x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2783 &skip_build_pred, args, ref_best_rd);
2784 #if CONFIG_COLLECT_COMPONENT_TIMING
2785 end_timing(cpi, interpolation_filter_search_time);
2786 #endif
2787 if (args->modelled_rd != NULL && !is_comp_pred) {
2788 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
2789 }
2790 if (ret_val != 0) {
2791 restore_dst_buf(xd, orig_dst, num_planes);
2792 continue;
2793 } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
2794 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
2795 restore_dst_buf(xd, orig_dst, num_planes);
2796 continue;
2797 }
2798
2799 // Compute modelled RD if enabled
2800 if (args->modelled_rd != NULL) {
2801 if (is_comp_pred) {
2802 const int mode0 = compound_ref0_mode(this_mode);
2803 const int mode1 = compound_ref1_mode(this_mode);
2804 const int64_t mrd =
2805 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
2806 args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
2807 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
2808 restore_dst_buf(xd, orig_dst, num_planes);
2809 continue;
2810 }
2811 }
2812 }
2813 rd_stats->rate += compmode_interinter_cost;
2814 if (skip_build_pred != 1) {
2815 // Build this inter predictor if it has not been previously built
2816 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
2817 av1_num_planes(cm) - 1);
2818 }
2819
2820 #if CONFIG_COLLECT_COMPONENT_TIMING
2821 start_timing(cpi, motion_mode_rd_time);
2822 #endif
2823 int rate2_nocoeff = rd_stats->rate;
2824 // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
2825 // OBMC_CAUSAL or WARPED_CAUSAL
2826 int64_t this_yrd;
2827 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
2828 rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
2829 &orig_dst, best_est_rd, do_tx_search,
2830 inter_modes_info, 0, &this_yrd);
2831 #if CONFIG_COLLECT_COMPONENT_TIMING
2832 end_timing(cpi, motion_mode_rd_time);
2833 #endif
2834 assert(
2835 IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
2836
2837 if (ret_val != INT64_MAX) {
2838 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2839 if (tmp_rd < mode_info[ref_mv_idx].rd) {
2840 // Only update mode_info if the new result is actually better.
2841 mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
2842 mode_info[ref_mv_idx].rate_mv = rate_mv;
2843 mode_info[ref_mv_idx].rd = tmp_rd;
2844 }
2845 const THR_MODES mode_enum = get_prediction_mode_idx(
2846 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2847 // Collect mode stats for multiwinner mode processing
2848 store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
2849 rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
2850 cpi->sf.winner_mode_sf.multi_winner_mode_type,
2851 do_tx_search);
2852 if (tmp_rd < best_rd) {
2853 best_yrd = this_yrd;
2854 // Update the best rd stats if we found the best mode so far
2855 best_rd_stats = *rd_stats;
2856 best_rd_stats_y = *rd_stats_y;
2857 best_rd_stats_uv = *rd_stats_uv;
2858 best_rd = tmp_rd;
2859 best_mbmi = *mbmi;
2860 best_xskip_txfm = txfm_info->skip_txfm;
2861 memcpy(best_blk_skip, txfm_info->blk_skip,
2862 sizeof(best_blk_skip[0]) * xd->height * xd->width);
2863 av1_copy_array(best_tx_type_map, xd->tx_type_map,
2864 xd->height * xd->width);
2865 motion_mode_cand->rate_mv = rate_mv;
2866 motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
2867 }
2868
2869 if (tmp_rd < ref_best_rd) {
2870 ref_best_rd = tmp_rd;
2871 best_ref_mv_idx = ref_mv_idx;
2872 }
2873 }
2874 restore_dst_buf(xd, orig_dst, num_planes);
2875 }
2876
2877 if (best_rd == INT64_MAX) return INT64_MAX;
2878
2879 // re-instate status of the best choice
2880 *rd_stats = best_rd_stats;
2881 *rd_stats_y = best_rd_stats_y;
2882 *rd_stats_uv = best_rd_stats_uv;
2883 *yrd = best_yrd;
2884 *mbmi = best_mbmi;
2885 txfm_info->skip_txfm = best_xskip_txfm;
2886 assert(IMPLIES(mbmi->comp_group_idx == 1,
2887 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
2888 memcpy(txfm_info->blk_skip, best_blk_skip,
2889 sizeof(best_blk_skip[0]) * xd->height * xd->width);
2890 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
2891
2892 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2893
2894 return rd_stats->rdcost;
2895 }
2896
2897 /*!\brief Search for the best intrabc predictor
2898 *
2899 * \ingroup intra_mode_search
2900 * \callergraph
2901 * This function performs a motion search to find the best intrabc predictor.
2902 *
2903 * \returns Returns the best overall rdcost (including the non-intrabc modes
2904 * search before this function).
2905 */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)2906 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
2907 PICK_MODE_CONTEXT *ctx,
2908 RD_STATS *rd_stats, BLOCK_SIZE bsize,
2909 int64_t best_rd) {
2910 const AV1_COMMON *const cm = &cpi->common;
2911 if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc)
2912 return INT64_MAX;
2913 const int num_planes = av1_num_planes(cm);
2914
2915 MACROBLOCKD *const xd = &x->e_mbd;
2916 const TileInfo *tile = &xd->tile;
2917 MB_MODE_INFO *mbmi = xd->mi[0];
2918 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2919
2920 const int mi_row = xd->mi_row;
2921 const int mi_col = xd->mi_col;
2922 const int w = block_size_wide[bsize];
2923 const int h = block_size_high[bsize];
2924 const int sb_row = mi_row >> cm->seq_params.mib_size_log2;
2925 const int sb_col = mi_col >> cm->seq_params.mib_size_log2;
2926
2927 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2928 MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
2929 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
2930 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
2931 mbmi_ext->mode_context);
2932 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
2933 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
2934 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
2935 int_mv nearestmv, nearmv;
2936 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
2937 0);
2938
2939 if (nearestmv.as_int == INVALID_MV) {
2940 nearestmv.as_int = 0;
2941 }
2942 if (nearmv.as_int == INVALID_MV) {
2943 nearmv.as_int = 0;
2944 }
2945
2946 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
2947 if (dv_ref.as_int == 0) {
2948 av1_find_ref_dv(&dv_ref, tile, cm->seq_params.mib_size, mi_row);
2949 }
2950 // Ref DV should not have sub-pel.
2951 assert((dv_ref.as_mv.col & 7) == 0);
2952 assert((dv_ref.as_mv.row & 7) == 0);
2953 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
2954
2955 struct buf_2d yv12_mb[MAX_MB_PLANE];
2956 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
2957 for (int i = 0; i < num_planes; ++i) {
2958 xd->plane[i].pre[0] = yv12_mb[i];
2959 }
2960
2961 enum IntrabcMotionDirection {
2962 IBC_MOTION_ABOVE,
2963 IBC_MOTION_LEFT,
2964 IBC_MOTION_DIRECTIONS
2965 };
2966
2967 MB_MODE_INFO best_mbmi = *mbmi;
2968 RD_STATS best_rdstats = *rd_stats;
2969 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
2970 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2971 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
2972
2973 FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
2974 const search_site_config *lookahead_search_sites =
2975 cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
2976 av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
2977 &dv_ref.as_mv, lookahead_search_sites,
2978 /*fine_search_interval=*/0);
2979 fullms_params.is_intra_mode = 1;
2980
2981 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
2982 dir < IBC_MOTION_DIRECTIONS; ++dir) {
2983 switch (dir) {
2984 case IBC_MOTION_ABOVE:
2985 fullms_params.mv_limits.col_min =
2986 (tile->mi_col_start - mi_col) * MI_SIZE;
2987 fullms_params.mv_limits.col_max =
2988 (tile->mi_col_end - mi_col) * MI_SIZE - w;
2989 fullms_params.mv_limits.row_min =
2990 (tile->mi_row_start - mi_row) * MI_SIZE;
2991 fullms_params.mv_limits.row_max =
2992 (sb_row * cm->seq_params.mib_size - mi_row) * MI_SIZE - h;
2993 break;
2994 case IBC_MOTION_LEFT:
2995 fullms_params.mv_limits.col_min =
2996 (tile->mi_col_start - mi_col) * MI_SIZE;
2997 fullms_params.mv_limits.col_max =
2998 (sb_col * cm->seq_params.mib_size - mi_col) * MI_SIZE - w;
2999 // TODO(aconverse@google.com): Minimize the overlap between above and
3000 // left areas.
3001 fullms_params.mv_limits.row_min =
3002 (tile->mi_row_start - mi_row) * MI_SIZE;
3003 int bottom_coded_mi_edge =
3004 AOMMIN((sb_row + 1) * cm->seq_params.mib_size, tile->mi_row_end);
3005 fullms_params.mv_limits.row_max =
3006 (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3007 break;
3008 default: assert(0);
3009 }
3010 assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3011 assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3012 assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3013 assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3014
3015 av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3016
3017 if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3018 fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3019 continue;
3020 }
3021
3022 const int step_param = cpi->mv_search_params.mv_step_param;
3023 const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3024 IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3025 int_mv best_mv, best_hash_mv;
3026
3027 int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
3028 NULL, &best_mv.as_fullmv, NULL);
3029 const int hashsme = av1_intrabc_hash_search(
3030 cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3031 if (hashsme < bestsme) {
3032 best_mv = best_hash_mv;
3033 bestsme = hashsme;
3034 }
3035
3036 if (bestsme == INT_MAX) continue;
3037 const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3038 if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3039 get_fullmv_from_mv(&dv)))
3040 continue;
3041 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3042 cm->seq_params.mib_size_log2))
3043 continue;
3044
3045 // DV should not have sub-pel.
3046 assert((dv.col & 7) == 0);
3047 assert((dv.row & 7) == 0);
3048 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3049 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3050 mbmi->use_intrabc = 1;
3051 mbmi->mode = DC_PRED;
3052 mbmi->uv_mode = UV_DC_PRED;
3053 mbmi->motion_mode = SIMPLE_TRANSLATION;
3054 mbmi->mv[0].as_mv = dv;
3055 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3056 mbmi->skip_txfm = 0;
3057 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3058 av1_num_planes(cm) - 1);
3059
3060 const IntraBCMVCosts *const dv_costs = &cpi->dv_costs;
3061 int *dvcost[2] = { (int *)&dv_costs->mv_component[0][MV_MAX],
3062 (int *)&dv_costs->mv_component[1][MV_MAX] };
3063 // TODO(aconverse@google.com): The full motion field defining discount
3064 // in MV_COST_WEIGHT is too large. Explore other values.
3065 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3066 dvcost, MV_COST_WEIGHT_SUB);
3067 const int rate_mode = x->mode_costs.intrabc_cost[1];
3068 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3069 if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3070 &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3071 continue;
3072 rd_stats_yuv.rdcost =
3073 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3074 if (rd_stats_yuv.rdcost < best_rd) {
3075 best_rd = rd_stats_yuv.rdcost;
3076 best_mbmi = *mbmi;
3077 best_rdstats = rd_stats_yuv;
3078 memcpy(best_blk_skip, txfm_info->blk_skip,
3079 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3080 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3081 }
3082 }
3083 *mbmi = best_mbmi;
3084 *rd_stats = best_rdstats;
3085 memcpy(txfm_info->blk_skip, best_blk_skip,
3086 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3087 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3088 #if CONFIG_RD_DEBUG
3089 mbmi->rd_stats = *rd_stats;
3090 #endif
3091 return best_rd;
3092 }
3093
3094 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3095 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3096 // the typedef will prevent doxygen from finding this function and generating
3097 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3098 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3099 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3100 struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3101 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3102 const AV1_COMMON *const cm = &cpi->common;
3103 MACROBLOCKD *const xd = &x->e_mbd;
3104 MB_MODE_INFO *const mbmi = xd->mi[0];
3105 const int num_planes = av1_num_planes(cm);
3106 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3107 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3108 int y_skip_txfm = 0, uv_skip_txfm = 0;
3109 int64_t dist_y = 0, dist_uv = 0;
3110
3111 ctx->rd_stats.skip_txfm = 0;
3112 mbmi->ref_frame[0] = INTRA_FRAME;
3113 mbmi->ref_frame[1] = NONE_FRAME;
3114 mbmi->use_intrabc = 0;
3115 mbmi->mv[0].as_int = 0;
3116 mbmi->skip_mode = 0;
3117
3118 const int64_t intra_yrd =
3119 av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3120 &y_skip_txfm, bsize, best_rd, ctx);
3121
3122 // Initialize default mode evaluation params
3123 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3124
3125 if (intra_yrd < best_rd) {
3126 // Search intra modes for uv planes if needed
3127 if (num_planes > 1) {
3128 // Set up the tx variables for reproducing the y predictions in case we
3129 // need it for chroma-from-luma.
3130 if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3131 memcpy(txfm_info->blk_skip, ctx->blk_skip,
3132 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3133 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3134 }
3135 const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3136 av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3137 &dist_uv, &uv_skip_txfm, bsize,
3138 max_uv_tx_size);
3139 }
3140
3141 // Intra block is always coded as non-skip
3142 rd_cost->rate =
3143 rate_y + rate_uv +
3144 x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3145 rd_cost->dist = dist_y + dist_uv;
3146 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3147 rd_cost->skip_txfm = 0;
3148 } else {
3149 rd_cost->rate = INT_MAX;
3150 }
3151
3152 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3153 best_rd = rd_cost->rdcost;
3154 if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3155 ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3156 memcpy(ctx->blk_skip, txfm_info->blk_skip,
3157 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3158 assert(rd_cost->rate != INT_MAX);
3159 }
3160 if (rd_cost->rate == INT_MAX) return;
3161
3162 ctx->mic = *xd->mi[0];
3163 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3164 av1_ref_frame_type(xd->mi[0]->ref_frame));
3165 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3166 }
3167
3168 static AOM_INLINE void calc_target_weighted_pred(
3169 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3170 const uint8_t *above, int above_stride, const uint8_t *left,
3171 int left_stride);
3172
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3173 static AOM_INLINE void rd_pick_skip_mode(
3174 RD_STATS *rd_cost, InterModeSearchState *search_state,
3175 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3176 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3177 const AV1_COMMON *const cm = &cpi->common;
3178 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3179 const int num_planes = av1_num_planes(cm);
3180 MACROBLOCKD *const xd = &x->e_mbd;
3181 MB_MODE_INFO *const mbmi = xd->mi[0];
3182 const TxfmSearchParams *txfm_params = &x->txfm_search_params;
3183
3184 x->compound_idx = 1; // COMPOUND_AVERAGE
3185 RD_STATS skip_mode_rd_stats;
3186 av1_invalid_rd_stats(&skip_mode_rd_stats);
3187
3188 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3189 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3190 return;
3191 }
3192
3193 const MV_REFERENCE_FRAME ref_frame =
3194 LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3195 const MV_REFERENCE_FRAME second_ref_frame =
3196 LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3197 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3198 const THR_MODES mode_index =
3199 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3200
3201 if (mode_index == THR_INVALID) {
3202 return;
3203 }
3204
3205 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3206 cpi->sf.inter_sf.disable_onesided_comp) &&
3207 cpi->all_one_sided_refs) {
3208 return;
3209 }
3210
3211 mbmi->mode = this_mode;
3212 mbmi->uv_mode = UV_DC_PRED;
3213 mbmi->ref_frame[0] = ref_frame;
3214 mbmi->ref_frame[1] = second_ref_frame;
3215 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3216 if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3217 MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3218 if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3219 mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3220 return;
3221 }
3222 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3223 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3224 mbmi_ext->mode_context);
3225 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3226 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3227 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3228 }
3229
3230 assert(this_mode == NEAREST_NEARESTMV);
3231 if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3232 return;
3233 }
3234
3235 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3236 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3237 mbmi->comp_group_idx = 0;
3238 mbmi->compound_idx = x->compound_idx;
3239 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3240 mbmi->motion_mode = SIMPLE_TRANSLATION;
3241 mbmi->ref_mv_idx = 0;
3242 mbmi->skip_mode = mbmi->skip_txfm = 1;
3243
3244 set_default_interp_filters(mbmi, cm->features.interp_filter);
3245
3246 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3247 for (int i = 0; i < num_planes; i++) {
3248 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3249 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3250 }
3251
3252 BUFFER_SET orig_dst;
3253 for (int i = 0; i < num_planes; i++) {
3254 orig_dst.plane[i] = xd->plane[i].dst.buf;
3255 orig_dst.stride[i] = xd->plane[i].dst.stride;
3256 }
3257
3258 // Obtain the rdcost for skip_mode.
3259 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst);
3260
3261 // Compare the use of skip_mode with the best intra/inter mode obtained.
3262 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3263 int64_t best_intra_inter_mode_cost = INT64_MAX;
3264 if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3265 const ModeCosts *mode_costs = &x->mode_costs;
3266 best_intra_inter_mode_cost = RDCOST(
3267 x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3268 rd_cost->dist);
3269 // Account for non-skip mode rate in total rd stats
3270 rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3271 av1_rd_cost_update(x->rdmult, rd_cost);
3272 }
3273
3274 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3275 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3276 assert(mode_index != THR_INVALID);
3277 search_state->best_mbmode.skip_mode = 1;
3278 search_state->best_mbmode = *mbmi;
3279
3280 search_state->best_mbmode.skip_mode = search_state->best_mbmode.skip_txfm =
3281 1;
3282 search_state->best_mbmode.mode = NEAREST_NEARESTMV;
3283 search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0];
3284 search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1];
3285 search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int;
3286 search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int;
3287 search_state->best_mbmode.ref_mv_idx = 0;
3288
3289 // Set up tx_size related variables for skip-specific loop filtering.
3290 search_state->best_mbmode.tx_size =
3291 block_signals_txsize(bsize)
3292 ? tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type)
3293 : max_txsize_rect_lookup[bsize];
3294 memset(search_state->best_mbmode.inter_tx_size,
3295 search_state->best_mbmode.tx_size,
3296 sizeof(search_state->best_mbmode.inter_tx_size));
3297 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3298 search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3299 xd);
3300
3301 // Set up color-related variables for skip mode.
3302 search_state->best_mbmode.uv_mode = UV_DC_PRED;
3303 search_state->best_mbmode.palette_mode_info.palette_size[0] = 0;
3304 search_state->best_mbmode.palette_mode_info.palette_size[1] = 0;
3305
3306 search_state->best_mbmode.comp_group_idx = 0;
3307 search_state->best_mbmode.compound_idx = x->compound_idx;
3308 search_state->best_mbmode.interinter_comp.type = COMPOUND_AVERAGE;
3309 search_state->best_mbmode.motion_mode = SIMPLE_TRANSLATION;
3310
3311 search_state->best_mbmode.interintra_mode =
3312 (INTERINTRA_MODE)(II_DC_PRED - 1);
3313 search_state->best_mbmode.filter_intra_mode_info.use_filter_intra = 0;
3314
3315 set_default_interp_filters(&search_state->best_mbmode,
3316 cm->features.interp_filter);
3317
3318 search_state->best_mode_index = mode_index;
3319
3320 // Update rd_cost
3321 rd_cost->rate = skip_mode_rd_stats.rate;
3322 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3323 rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3324
3325 search_state->best_rd = rd_cost->rdcost;
3326 search_state->best_skip2 = 1;
3327 search_state->best_mode_skippable = 1;
3328
3329 x->txfm_search_info.skip_txfm = 1;
3330 }
3331 }
3332
3333 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3334 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3335 MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3336 int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3337 RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3338 THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3339 int mode_idx) {
3340 MB_MODE_INFO *winner_mbmi;
3341 if (multi_winner_mode_type) {
3342 assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3343 WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3344 winner_mbmi = &winner_mode_stat->mbmi;
3345
3346 *winner_rd_cost = &winner_mode_stat->rd_cost;
3347 *winner_rate_y = winner_mode_stat->rate_y;
3348 *winner_rate_uv = winner_mode_stat->rate_uv;
3349 *winner_mode_index = winner_mode_stat->mode_index;
3350 } else {
3351 winner_mbmi = best_mbmode;
3352 *winner_rd_cost = best_rd_cost;
3353 *winner_rate_y = best_rate_y;
3354 *winner_rate_uv = best_rate_uv;
3355 *winner_mode_index = *best_mode_index;
3356 }
3357 return winner_mbmi;
3358 }
3359
3360 // speed feature: fast intra/inter transform type search
3361 // Used for speed >= 2
3362 // When this speed feature is on, in rd mode search, only DCT is used.
3363 // After the mode is determined, this function is called, to select
3364 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3365 static AOM_INLINE void refine_winner_mode_tx(
3366 const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3367 PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3368 MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3369 int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3370 const AV1_COMMON *const cm = &cpi->common;
3371 MACROBLOCKD *const xd = &x->e_mbd;
3372 MB_MODE_INFO *const mbmi = xd->mi[0];
3373 TxfmSearchParams *txfm_params = &x->txfm_search_params;
3374 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3375 int64_t best_rd;
3376 const int num_planes = av1_num_planes(cm);
3377
3378 if (!is_winner_mode_processing_enabled(cpi, best_mbmode, best_mbmode->mode))
3379 return;
3380
3381 // Set params for winner mode evaluation
3382 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3383
3384 // No best mode identified so far
3385 if (*best_mode_index == THR_INVALID) return;
3386
3387 best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3388 for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3389 RD_STATS *winner_rd_stats = NULL;
3390 int winner_rate_y = 0, winner_rate_uv = 0;
3391 THR_MODES winner_mode_index = 0;
3392
3393 // TODO(any): Combine best mode and multi-winner mode processing paths
3394 // Get winner mode stats for current mode index
3395 MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3396 x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3397 &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3398 cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3399
3400 if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3401 winner_mode_index != THR_INVALID &&
3402 is_winner_mode_processing_enabled(cpi, winner_mbmi,
3403 winner_mbmi->mode)) {
3404 RD_STATS rd_stats = *winner_rd_stats;
3405 int skip_blk = 0;
3406 RD_STATS rd_stats_y, rd_stats_uv;
3407 const int skip_ctx = av1_get_skip_txfm_context(xd);
3408
3409 *mbmi = *winner_mbmi;
3410
3411 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3412
3413 // Select prediction reference frames.
3414 for (int i = 0; i < num_planes; i++) {
3415 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3416 if (has_second_ref(mbmi))
3417 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3418 }
3419
3420 if (is_inter_mode(mbmi->mode)) {
3421 const int mi_row = xd->mi_row;
3422 const int mi_col = xd->mi_col;
3423 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3424 av1_num_planes(cm) - 1);
3425 if (mbmi->motion_mode == OBMC_CAUSAL)
3426 av1_build_obmc_inter_predictors_sb(cm, xd);
3427
3428 av1_subtract_plane(x, bsize, 0);
3429 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3430 !xd->lossless[mbmi->segment_id]) {
3431 av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3432 INT64_MAX);
3433 assert(rd_stats_y.rate != INT_MAX);
3434 } else {
3435 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3436 INT64_MAX);
3437 memset(mbmi->inter_tx_size, mbmi->tx_size,
3438 sizeof(mbmi->inter_tx_size));
3439 for (int i = 0; i < xd->height * xd->width; ++i)
3440 set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3441 }
3442 } else {
3443 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3444 INT64_MAX);
3445 }
3446
3447 if (num_planes > 1) {
3448 av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3449 } else {
3450 av1_init_rd_stats(&rd_stats_uv);
3451 }
3452
3453 const ModeCosts *mode_costs = &x->mode_costs;
3454 if (is_inter_mode(mbmi->mode) &&
3455 RDCOST(x->rdmult,
3456 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3457 rd_stats_uv.rate,
3458 (rd_stats_y.dist + rd_stats_uv.dist)) >
3459 RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3460 (rd_stats_y.sse + rd_stats_uv.sse))) {
3461 skip_blk = 1;
3462 rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3463 rd_stats_uv.rate = 0;
3464 rd_stats_y.dist = rd_stats_y.sse;
3465 rd_stats_uv.dist = rd_stats_uv.sse;
3466 } else {
3467 skip_blk = 0;
3468 rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3469 }
3470 int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3471 winner_rate_y - winner_rate_uv;
3472 int64_t this_rd =
3473 RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3474 if (best_rd > this_rd) {
3475 *best_mbmode = *mbmi;
3476 *best_mode_index = winner_mode_index;
3477 av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3478 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3479 rd_cost->rate = this_rate;
3480 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3481 rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3482 rd_cost->rdcost = this_rd;
3483 best_rd = this_rd;
3484 *best_skip2 = skip_blk;
3485 }
3486 }
3487 }
3488 }
3489
3490 /*!\cond */
3491 typedef struct {
3492 // Mask for each reference frame, specifying which prediction modes to NOT try
3493 // during search.
3494 uint32_t pred_modes[REF_FRAMES];
3495 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3496 // reference frames (i, j).
3497 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3498 // (NONE_FRAME).
3499 bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3500 } mode_skip_mask_t;
3501 /*!\endcond */
3502
3503 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3504 static AOM_INLINE void disable_reference(
3505 MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3506 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3507 ref_combo[ref][ref2 + 1] = true;
3508 }
3509 }
3510
3511 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3512 static AOM_INLINE void disable_inter_references_except_altref(
3513 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3514 disable_reference(LAST_FRAME, ref_combo);
3515 disable_reference(LAST2_FRAME, ref_combo);
3516 disable_reference(LAST3_FRAME, ref_combo);
3517 disable_reference(GOLDEN_FRAME, ref_combo);
3518 disable_reference(BWDREF_FRAME, ref_combo);
3519 disable_reference(ALTREF2_FRAME, ref_combo);
3520 }
3521
3522 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3523 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME },
3524 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME },
3525 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
3526 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME },
3527 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME },
3528 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3529 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
3530 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME },
3531 };
3532
3533 static const MV_REFERENCE_FRAME real_time_ref_combos[][2] = {
3534 { LAST_FRAME, NONE_FRAME },
3535 { ALTREF_FRAME, NONE_FRAME },
3536 { GOLDEN_FRAME, NONE_FRAME },
3537 { INTRA_FRAME, NONE_FRAME }
3538 };
3539
3540 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3541
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3542 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3543 REF_SET ref_set) {
3544 if (ref_set == REF_SET_FULL) {
3545 // Everything available by default.
3546 memset(mask, 0, sizeof(*mask));
3547 } else {
3548 // All modes available by default.
3549 memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3550 // All references disabled first.
3551 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3552 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3553 mask->ref_combo[ref1][ref2 + 1] = true;
3554 }
3555 }
3556 const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3557 int num_ref_combos;
3558
3559 // Then enable reduced set of references explicitly.
3560 switch (ref_set) {
3561 case REF_SET_REDUCED:
3562 ref_set_combos = reduced_ref_combos;
3563 num_ref_combos =
3564 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3565 break;
3566 case REF_SET_REALTIME:
3567 ref_set_combos = real_time_ref_combos;
3568 num_ref_combos =
3569 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3570 break;
3571 default: assert(0); num_ref_combos = 0;
3572 }
3573
3574 for (int i = 0; i < num_ref_combos; ++i) {
3575 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3576 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3577 }
3578 }
3579 }
3580
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3581 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3582 const AV1_COMP *cpi, MACROBLOCK *x,
3583 BLOCK_SIZE bsize) {
3584 const AV1_COMMON *const cm = &cpi->common;
3585 const struct segmentation *const seg = &cm->seg;
3586 MACROBLOCKD *const xd = &x->e_mbd;
3587 MB_MODE_INFO *const mbmi = xd->mi[0];
3588 unsigned char segment_id = mbmi->segment_id;
3589 const SPEED_FEATURES *const sf = &cpi->sf;
3590 REF_SET ref_set = REF_SET_FULL;
3591
3592 if (sf->rt_sf.use_real_time_ref_set)
3593 ref_set = REF_SET_REALTIME;
3594 else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3595 ref_set = REF_SET_REDUCED;
3596
3597 default_skip_mask(mask, ref_set);
3598
3599 int min_pred_mv_sad = INT_MAX;
3600 MV_REFERENCE_FRAME ref_frame;
3601 if (ref_set == REF_SET_REALTIME) {
3602 // For real-time encoding, we only look at a subset of ref frames. So the
3603 // threshold for pruning should be computed from this subset as well.
3604 const int num_rt_refs =
3605 sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3606 for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3607 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3608 if (ref != INTRA_FRAME) {
3609 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3610 }
3611 }
3612 } else {
3613 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3614 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3615 }
3616
3617 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3618 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3619 // Skip checking missing reference in both single and compound reference
3620 // modes.
3621 disable_reference(ref_frame, mask->ref_combo);
3622 } else {
3623 // Skip fixed mv modes for poor references
3624 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3625 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3626 }
3627 }
3628 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3629 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3630 // Reference not used for the segment.
3631 disable_reference(ref_frame, mask->ref_combo);
3632 }
3633 }
3634 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3635 // is disabled for this segment. This is to prevent the possibility that we
3636 // end up unable to pick any mode.
3637 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3638 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3639 // unless ARNR filtering is enabled in which case we want
3640 // an unfiltered alternative. We allow near/nearest as well
3641 // because they may result in zero-zero MVs but be cheaper.
3642 if (cpi->rc.is_src_frame_alt_ref &&
3643 (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3644 disable_inter_references_except_altref(mask->ref_combo);
3645
3646 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3647 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3648 int_mv near_mv, nearest_mv, global_mv;
3649 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3650 &x->mbmi_ext);
3651 get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3652 get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3653
3654 if (near_mv.as_int != global_mv.as_int)
3655 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3656 if (nearest_mv.as_int != global_mv.as_int)
3657 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3658 }
3659 }
3660
3661 if (cpi->rc.is_src_frame_alt_ref) {
3662 if (sf->inter_sf.alt_ref_search_fp &&
3663 (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3664 mask->pred_modes[ALTREF_FRAME] = 0;
3665 disable_inter_references_except_altref(mask->ref_combo);
3666 disable_reference(INTRA_FRAME, mask->ref_combo);
3667 }
3668 }
3669
3670 if (sf->inter_sf.alt_ref_search_fp) {
3671 if (!cm->show_frame && x->best_pred_mv_sad < INT_MAX) {
3672 int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 3);
3673 // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3674 // those are past frames
3675 for (ref_frame = BWDREF_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3676 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3677 0)
3678 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3679 mask->pred_modes[ref_frame] |= INTER_ALL;
3680 }
3681 }
3682 }
3683
3684 if (bsize > sf->part_sf.max_intra_bsize) {
3685 disable_reference(INTRA_FRAME, mask->ref_combo);
3686 }
3687
3688 mask->pred_modes[INTRA_FRAME] |=
3689 ~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
3690 }
3691
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3692 static AOM_INLINE void init_neighbor_pred_buf(
3693 const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3694 int is_hbd) {
3695 if (is_hbd) {
3696 const int len = sizeof(uint16_t);
3697 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3698 args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3699 (MAX_SB_SQUARE >> 1) * len);
3700 args->above_pred_buf[2] =
3701 CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3702 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3703 args->left_pred_buf[1] =
3704 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3705 args->left_pred_buf[2] =
3706 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3707 } else {
3708 args->above_pred_buf[0] = obmc_buffer->above_pred;
3709 args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3710 args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3711 args->left_pred_buf[0] = obmc_buffer->left_pred;
3712 args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3713 args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3714 }
3715 }
3716
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3717 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3718 MV_REFERENCE_FRAME ref_frame) {
3719 const AV1_COMMON *const cm = &cpi->common;
3720 MV_REFERENCE_FRAME rf[2];
3721 av1_set_ref_frame(rf, ref_frame);
3722
3723 if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3724
3725 if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3726 cm->cur_frame->ref_display_order_hint)) {
3727 return 1;
3728 }
3729
3730 return 0;
3731 }
3732
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3733 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3734 int ref_frame, int skip_ref_frame_mask) {
3735 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3736 if (!(skip_ref_frame_mask & (1 << r))) {
3737 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3738 if (rf[0] == ref_frame || rf[1] == ref_frame) {
3739 return 1;
3740 }
3741 }
3742 }
3743 return 0;
3744 }
3745
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3746 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3747 const MB_MODE_INFO *mi_cache) {
3748 if (!mi_cache) {
3749 return 0;
3750 }
3751
3752 if (ref_frame < REF_FRAMES) {
3753 return (ref_frame == mi_cache->ref_frame[0] ||
3754 ref_frame == mi_cache->ref_frame[1]);
3755 }
3756
3757 // if we are here, then the current mode is compound.
3758 MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
3759 return ref_frame == cached_ref_type;
3760 }
3761
3762 // Please add/modify parameter setting in this function, making it consistent
3763 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])3764 static AOM_INLINE void set_params_rd_pick_inter_mode(
3765 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
3766 BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
3767 unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
3768 struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
3769 const AV1_COMMON *const cm = &cpi->common;
3770 MACROBLOCKD *const xd = &x->e_mbd;
3771 MB_MODE_INFO *const mbmi = xd->mi[0];
3772 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3773 unsigned char segment_id = mbmi->segment_id;
3774
3775 init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
3776 av1_collect_neighbors_ref_counts(xd);
3777 estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
3778 ref_costs_comp);
3779
3780 const int mi_row = xd->mi_row;
3781 const int mi_col = xd->mi_col;
3782 x->best_pred_mv_sad = INT_MAX;
3783
3784 for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
3785 ++ref_frame) {
3786 x->pred_mv_sad[ref_frame] = INT_MAX;
3787 mbmi_ext->mode_context[ref_frame] = 0;
3788 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3789 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
3790 // Skip the ref frame if the mask says skip and the ref is not used by
3791 // compound ref.
3792 if (skip_ref_frame_mask & (1 << ref_frame) &&
3793 !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
3794 !is_ref_frame_used_in_cache(ref_frame, x->intermode_cache)) {
3795 continue;
3796 }
3797 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
3798 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
3799 }
3800 // Store the best pred_mv_sad across all past frames
3801 if (cpi->sf.inter_sf.alt_ref_search_fp &&
3802 cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3803 x->best_pred_mv_sad =
3804 AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3805 }
3806
3807 if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
3808 // No second reference on RT ref set, so no need to initialize
3809 for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
3810 ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
3811 mbmi_ext->mode_context[ref_frame] = 0;
3812 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3813 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
3814 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
3815 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
3816 continue;
3817 }
3818
3819 if (skip_ref_frame_mask & (1 << ref_frame) &&
3820 !is_ref_frame_used_in_cache(ref_frame, x->intermode_cache)) {
3821 continue;
3822 }
3823 // Ref mv list population is not required, when compound references are
3824 // pruned.
3825 if (prune_ref_frame(cpi, x, ref_frame)) continue;
3826
3827 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3828 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3829 mbmi_ext->mode_context);
3830 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3831 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3832 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3833 }
3834 }
3835
3836 av1_count_overlappable_neighbors(cm, xd);
3837 const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
3838 const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
3839 cpi->sf.inter_sf.prune_obmc_prob_thresh;
3840 if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
3841 if (check_num_overlappable_neighbors(mbmi) &&
3842 is_motion_variation_allowed_bsize(bsize)) {
3843 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3844 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3845 MAX_SB_SIZE >> 1 };
3846 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3847 MAX_SB_SIZE >> 1 };
3848 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3849 av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
3850 dst_width1, dst_height1,
3851 args->above_pred_stride);
3852 av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
3853 dst_width2, dst_height2,
3854 args->left_pred_stride);
3855 const int num_planes = av1_num_planes(cm);
3856 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
3857 mi_col, 0, num_planes);
3858 calc_target_weighted_pred(
3859 cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
3860 args->left_pred_buf[0], args->left_pred_stride[0]);
3861 }
3862 }
3863
3864 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
3865
3866 // Set params for mode evaluation
3867 set_mode_eval_params(cpi, x, MODE_EVAL);
3868
3869 x->comp_rd_stats_idx = 0;
3870 }
3871
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)3872 static AOM_INLINE void init_inter_mode_search_state(
3873 InterModeSearchState *search_state, const AV1_COMP *cpi,
3874 const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
3875 init_intra_mode_search_state(&search_state->intra_search_state);
3876 av1_invalid_rd_stats(&search_state->best_y_rdcost);
3877
3878 search_state->best_rd = best_rd_so_far;
3879 search_state->best_skip_rd[0] = INT64_MAX;
3880 search_state->best_skip_rd[1] = INT64_MAX;
3881
3882 av1_zero(search_state->best_mbmode);
3883
3884 search_state->best_rate_y = INT_MAX;
3885
3886 search_state->best_rate_uv = INT_MAX;
3887
3888 search_state->best_mode_skippable = 0;
3889
3890 search_state->best_skip2 = 0;
3891
3892 search_state->best_mode_index = THR_INVALID;
3893
3894 const MACROBLOCKD *const xd = &x->e_mbd;
3895 const MB_MODE_INFO *const mbmi = xd->mi[0];
3896 const unsigned char segment_id = mbmi->segment_id;
3897
3898 search_state->num_available_refs = 0;
3899 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
3900 memset(search_state->dist_order_refs, -1,
3901 sizeof(search_state->dist_order_refs));
3902
3903 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
3904 search_state->mode_threshold[i] = 0;
3905 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
3906 for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3907 search_state->mode_threshold[i] =
3908 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
3909 RD_THRESH_FAC_FRAC_BITS;
3910
3911 search_state->best_intra_rd = INT64_MAX;
3912
3913 search_state->best_pred_sse = UINT_MAX;
3914
3915 av1_zero(search_state->single_newmv);
3916 av1_zero(search_state->single_newmv_rate);
3917 av1_zero(search_state->single_newmv_valid);
3918 for (int i = 0; i < MB_MODE_COUNT; ++i) {
3919 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
3920 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3921 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
3922 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
3923 }
3924 }
3925 }
3926
3927 for (int dir = 0; dir < 2; ++dir) {
3928 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3929 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3930 SingleInterModeState *state;
3931
3932 state = &search_state->single_state[dir][mode][ref_frame];
3933 state->ref_frame = NONE_FRAME;
3934 state->rd = INT64_MAX;
3935
3936 state = &search_state->single_state_modelled[dir][mode][ref_frame];
3937 state->ref_frame = NONE_FRAME;
3938 state->rd = INT64_MAX;
3939 }
3940 }
3941 }
3942 for (int dir = 0; dir < 2; ++dir) {
3943 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3944 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3945 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
3946 }
3947 }
3948 }
3949 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3950 search_state->best_single_rd[ref_frame] = INT64_MAX;
3951 search_state->best_single_mode[ref_frame] = MB_MODE_COUNT;
3952 }
3953 av1_zero(search_state->single_state_cnt);
3954 av1_zero(search_state->single_state_modelled_cnt);
3955
3956 for (int i = 0; i < REFERENCE_MODES; ++i) {
3957 search_state->best_pred_rd[i] = INT64_MAX;
3958 }
3959 }
3960
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)3961 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
3962 const MV_REFERENCE_FRAME *ref_frame,
3963 const PREDICTION_MODE this_mode) {
3964 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
3965 return true;
3966 }
3967
3968 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
3969 }
3970
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)3971 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
3972 BLOCK_SIZE bsize,
3973 PREDICTION_MODE curr_mode,
3974 const MV_REFERENCE_FRAME *ref_frames) {
3975 const int comp_pred = ref_frames[1] > INTRA_FRAME;
3976 if (comp_pred) {
3977 if (!is_comp_ref_allowed(bsize)) return 1;
3978 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
3979 return 1;
3980 }
3981
3982 const AV1_COMMON *const cm = &cpi->common;
3983 if (frame_is_intra_only(cm)) return 1;
3984
3985 const CurrentFrame *const current_frame = &cm->current_frame;
3986 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
3987
3988 const struct segmentation *const seg = &cm->seg;
3989 const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
3990 // Do not allow compound prediction if the segment level reference frame
3991 // feature is in use as in this case there can only be one reference.
3992 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
3993 }
3994
3995 if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
3996 // Mode must be compatible
3997 if (!is_interintra_allowed_bsize(bsize)) return 1;
3998 if (!is_interintra_allowed_mode(curr_mode)) return 1;
3999 }
4000
4001 return 0;
4002 }
4003
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4004 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4005 BLOCK_SIZE bsize, int mib_size) {
4006 const int sb_size_mask = mib_size - 1;
4007 const MACROBLOCKD *const xd = &x->e_mbd;
4008 const int mi_row = xd->mi_row;
4009 const int mi_col = xd->mi_col;
4010 const int mi_row_in_sb = mi_row & sb_size_mask;
4011 const int mi_col_in_sb = mi_col & sb_size_mask;
4012 const int mi_w = mi_size_wide[bsize];
4013 const int mi_h = mi_size_high[bsize];
4014 int picked_ref_frames_mask = 0;
4015 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4016 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4017 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4018 }
4019 }
4020 return picked_ref_frames_mask;
4021 }
4022
4023 // Check if reference frame pair of the current block matches with the given
4024 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4025 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4026 const MV_REFERENCE_FRAME *ref_frames) {
4027 return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4028 (ref_frames[1] == mbmi->ref_frame[1]));
4029 }
4030
4031 // Case 1: return 0, means don't skip this mode
4032 // Case 2: return 1, means skip this mode completely
4033 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4034 static int inter_mode_search_order_independent_skip(
4035 const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4036 InterModeSearchState *search_state, int skip_ref_frame_mask,
4037 PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4038 if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4039 return 1;
4040 }
4041
4042 const int ref_type = av1_ref_frame_type(ref_frame);
4043 if (prune_ref_frame(cpi, x, ref_type)) return 1;
4044
4045 // This is only used in motion vector unit test.
4046 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4047 ref_frame[0] == INTRA_FRAME)
4048 return 1;
4049
4050 const AV1_COMMON *const cm = &cpi->common;
4051 if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4052 return 1;
4053 }
4054
4055 // Reuse the prediction mode in cache
4056 if (x->use_intermode_cache) {
4057 const MB_MODE_INFO *cached_mi = x->intermode_cache;
4058 const PREDICTION_MODE cached_mode = cached_mi->mode;
4059 const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4060 const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4061
4062 // If the cached mode is intra, then we just need to match the mode.
4063 if (is_mode_intra(cached_mode) && mode != cached_mode) {
4064 return 1;
4065 }
4066
4067 // If the cached mode is single inter mode, then we match the mode and
4068 // reference frame.
4069 if (cached_mode_is_single) {
4070 if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4071 return 1;
4072 }
4073 } else {
4074 // If the cached mode is compound, then we need to consider several cases.
4075 const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4076 if (mode_is_single) {
4077 // If the mode is single, we know the modes can't match. But we might
4078 // still want to search it if compound mode depends on the current mode.
4079 int skip_motion_mode_only = 0;
4080 if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4081 skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4082 } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4083 skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4084 } else if (cached_mode == NEW_NEWMV) {
4085 skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4086 ref_frame[0] == cached_frame[1]);
4087 }
4088
4089 return 1 + skip_motion_mode_only;
4090 } else {
4091 // If both modes are compound, then everything must match.
4092 if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4093 ref_frame[1] != cached_frame[1]) {
4094 return 1;
4095 }
4096 }
4097 }
4098 }
4099
4100 const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4101 // If no valid mode has been found so far in PARTITION_NONE when finding a
4102 // valid partition is required, do not skip mode.
4103 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4104 x->must_find_valid_partition)
4105 return 0;
4106
4107 const SPEED_FEATURES *const sf = &cpi->sf;
4108 // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4109 // frames
4110 if (sf->inter_sf.prune_nearmv_using_neighbors &&
4111 (mode == NEAR_NEARMV || mode == NEARMV)) {
4112 const MACROBLOCKD *const xd = &x->e_mbd;
4113 if (search_state->best_rd != INT64_MAX && xd->left_available &&
4114 xd->up_available) {
4115 const int num_ref_frame_pair_match_thresh =
4116 2 - (x->qindex * 3 / QINDEX_RANGE);
4117 assert(num_ref_frame_pair_match_thresh <= 2 &&
4118 num_ref_frame_pair_match_thresh >= 0);
4119 int num_ref_frame_pair_match = 0;
4120
4121 num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4122 num_ref_frame_pair_match +=
4123 match_ref_frame_pair(xd->above_mbmi, ref_frame);
4124
4125 // Prune modes if:
4126 // num_ref_frame_pair_match < 2 for qindex 0 to 85
4127 // num_ref_frame_pair_match < 1 for qindex 86 to 170
4128 // No pruning for qindex 171 to 255
4129 if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4130 }
4131 }
4132
4133 int skip_motion_mode = 0;
4134 if (mbmi->partition != PARTITION_NONE) {
4135 int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4136 if (ref_type <= ALTREF_FRAME && skip_ref) {
4137 // Since the compound ref modes depends on the motion estimation result of
4138 // two single ref modes (best mv of single ref modes as the start point),
4139 // if current single ref mode is marked skip, we need to check if it will
4140 // be used in compound ref modes.
4141 if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4142 // Found a not skipped compound ref mode which contains current
4143 // single ref. So this single ref can't be skipped completely
4144 // Just skip its motion mode search, still try its simple
4145 // transition mode.
4146 skip_motion_mode = 1;
4147 skip_ref = 0;
4148 }
4149 }
4150 // If we are reusing the prediction from cache, and the current frame is
4151 // required by the cache, then we cannot prune it.
4152 if (is_ref_frame_used_in_cache(ref_type, x->intermode_cache)) {
4153 skip_ref = 0;
4154 // If the cache only needs the current reference type for compound
4155 // prediction, then we can skip motion mode search.
4156 skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4157 x->intermode_cache->ref_frame[1] > INTRA_FRAME);
4158 }
4159 if (skip_ref) return 1;
4160 }
4161
4162 if (ref_frame[0] == INTRA_FRAME) {
4163 if (mode != DC_PRED) {
4164 // Disable intra modes other than DC_PRED for blocks with low variance
4165 // Threshold for intra skipping based on source variance
4166 // TODO(debargha): Specialize the threshold for super block sizes
4167 const unsigned int skip_intra_var_thresh = 64;
4168 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4169 x->source_variance < skip_intra_var_thresh)
4170 return 1;
4171 }
4172 }
4173
4174 if (skip_motion_mode) return 2;
4175
4176 return 0;
4177 }
4178
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4179 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4180 const MV_REFERENCE_FRAME *ref_frames,
4181 const AV1_COMMON *cm) {
4182 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4183 mbmi->ref_mv_idx = 0;
4184 mbmi->mode = curr_mode;
4185 mbmi->uv_mode = UV_DC_PRED;
4186 mbmi->ref_frame[0] = ref_frames[0];
4187 mbmi->ref_frame[1] = ref_frames[1];
4188 pmi->palette_size[0] = 0;
4189 pmi->palette_size[1] = 0;
4190 mbmi->filter_intra_mode_info.use_filter_intra = 0;
4191 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4192 mbmi->motion_mode = SIMPLE_TRANSLATION;
4193 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4194 set_default_interp_filters(mbmi, cm->features.interp_filter);
4195 }
4196
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4197 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4198 InterModeSearchState *search_state,
4199 const MB_MODE_INFO *const mbmi) {
4200 int i, j;
4201 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4202 const PREDICTION_MODE this_mode = mbmi->mode;
4203 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4204 const int mode_offset = INTER_OFFSET(this_mode);
4205 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4206
4207 // Simple rd
4208 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4209 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4210 const int64_t rd =
4211 search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4212 if (rd < simple_rd) simple_rd = rd;
4213 }
4214
4215 // Insertion sort of single_state
4216 const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4217 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4218 i = search_state->single_state_cnt[dir][mode_offset];
4219 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4220 state_s[j] = state_s[j - 1];
4221 state_s[j] = this_state_s;
4222 search_state->single_state_cnt[dir][mode_offset]++;
4223
4224 // Modelled rd
4225 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4226 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4227 const int64_t rd =
4228 search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4229 if (rd < modelled_rd) modelled_rd = rd;
4230 }
4231
4232 // Insertion sort of single_state_modelled
4233 const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4234 SingleInterModeState *state_m =
4235 search_state->single_state_modelled[dir][mode_offset];
4236 i = search_state->single_state_modelled_cnt[dir][mode_offset];
4237 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4238 state_m[j] = state_m[j - 1];
4239 state_m[j] = this_state_m;
4240 search_state->single_state_modelled_cnt[dir][mode_offset]++;
4241 }
4242
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4243 static AOM_INLINE void analyze_single_states(
4244 const AV1_COMP *cpi, InterModeSearchState *search_state) {
4245 const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4246 assert(prune_level >= 1);
4247 int i, j, dir, mode;
4248
4249 for (dir = 0; dir < 2; ++dir) {
4250 int64_t best_rd;
4251 SingleInterModeState(*state)[FWD_REFS];
4252 const int prune_factor = prune_level >= 2 ? 6 : 5;
4253
4254 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4255 // reference frames for all the modes (NEARESTMV and NEARMV may not
4256 // have same motion vectors). Always keep the best of each mode
4257 // because it might form the best possible combination with other mode.
4258 state = search_state->single_state[dir];
4259 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4260 state[INTER_OFFSET(GLOBALMV)][0].rd);
4261 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4262 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4263 if (state[mode][i].rd != INT64_MAX &&
4264 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4265 state[mode][i].valid = 0;
4266 }
4267 }
4268 }
4269
4270 state = search_state->single_state_modelled[dir];
4271 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4272 state[INTER_OFFSET(GLOBALMV)][0].rd);
4273 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4274 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4275 if (state[mode][i].rd != INT64_MAX &&
4276 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4277 state[mode][i].valid = 0;
4278 }
4279 }
4280 }
4281 }
4282
4283 // Ordering by simple rd first, then by modelled rd
4284 for (dir = 0; dir < 2; ++dir) {
4285 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4286 const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4287 const int state_cnt_m =
4288 search_state->single_state_modelled_cnt[dir][mode];
4289 SingleInterModeState *state_s = search_state->single_state[dir][mode];
4290 SingleInterModeState *state_m =
4291 search_state->single_state_modelled[dir][mode];
4292 int count = 0;
4293 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4294 for (i = 0; i < state_cnt_s; ++i) {
4295 if (state_s[i].rd == INT64_MAX) break;
4296 if (state_s[i].valid) {
4297 search_state->single_rd_order[dir][mode][count++] =
4298 state_s[i].ref_frame;
4299 }
4300 }
4301 if (count >= max_candidates) continue;
4302
4303 for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4304 if (state_m[i].rd == INT64_MAX) break;
4305 if (!state_m[i].valid) continue;
4306 const int ref_frame = state_m[i].ref_frame;
4307 int match = 0;
4308 // Check if existing already
4309 for (j = 0; j < count; ++j) {
4310 if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4311 match = 1;
4312 break;
4313 }
4314 }
4315 if (match) continue;
4316 // Check if this ref_frame is removed in simple rd
4317 int valid = 1;
4318 for (j = 0; j < state_cnt_s; ++j) {
4319 if (ref_frame == state_s[j].ref_frame) {
4320 valid = state_s[j].valid;
4321 break;
4322 }
4323 }
4324 if (valid) {
4325 search_state->single_rd_order[dir][mode][count++] = ref_frame;
4326 }
4327 }
4328 }
4329 }
4330 }
4331
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4332 static int compound_skip_get_candidates(
4333 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4334 const int dir, const PREDICTION_MODE mode) {
4335 const int mode_offset = INTER_OFFSET(mode);
4336 const SingleInterModeState *state =
4337 search_state->single_state[dir][mode_offset];
4338 const SingleInterModeState *state_modelled =
4339 search_state->single_state_modelled[dir][mode_offset];
4340
4341 int max_candidates = 0;
4342 for (int i = 0; i < FWD_REFS; ++i) {
4343 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4344 max_candidates++;
4345 }
4346
4347 int candidates = max_candidates;
4348 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4349 candidates = AOMMIN(2, max_candidates);
4350 }
4351 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4352 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4353 state[0].ref_frame == state_modelled[0].ref_frame)
4354 candidates = 1;
4355 if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4356 }
4357
4358 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4359 // Limit the number of candidates to 1 in each direction for compound
4360 // prediction
4361 candidates = AOMMIN(1, candidates);
4362 }
4363 return candidates;
4364 }
4365
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4366 static int compound_skip_by_single_states(
4367 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4368 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4369 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4370 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4371 const int mode[2] = { compound_ref0_mode(this_mode),
4372 compound_ref1_mode(this_mode) };
4373 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4374 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4375 refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4376 int ref_searched[2] = { 0, 0 };
4377 int ref_mv_match[2] = { 1, 1 };
4378 int i, j;
4379
4380 for (i = 0; i < 2; ++i) {
4381 const SingleInterModeState *state =
4382 search_state->single_state[mode_dir[i]][mode_offset[i]];
4383 const int state_cnt =
4384 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4385 for (j = 0; j < state_cnt; ++j) {
4386 if (state[j].ref_frame == refs[i]) {
4387 ref_searched[i] = 1;
4388 break;
4389 }
4390 }
4391 }
4392
4393 const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4394 for (i = 0; i < 2; ++i) {
4395 if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4396 continue;
4397 }
4398 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4399 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4400 int_mv single_mv;
4401 int_mv comp_mv;
4402 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4403 &x->mbmi_ext);
4404 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4405 if (single_mv.as_int != comp_mv.as_int) {
4406 ref_mv_match[i] = 0;
4407 break;
4408 }
4409 }
4410 }
4411
4412 for (i = 0; i < 2; ++i) {
4413 if (!ref_searched[i] || !ref_mv_match[i]) continue;
4414 const int candidates =
4415 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4416 const MV_REFERENCE_FRAME *ref_order =
4417 search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4418 int match = 0;
4419 for (j = 0; j < candidates; ++j) {
4420 if (refs[i] == ref_order[j]) {
4421 match = 1;
4422 break;
4423 }
4424 }
4425 if (!match) return 1;
4426 }
4427
4428 return 0;
4429 }
4430
4431 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4432 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4433 const MV_REFERENCE_FRAME *ref_frames,
4434 int *const is_ref_match) {
4435 if (is_inter_block(mbmi)) {
4436 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4437 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4438 if (has_second_ref(mbmi)) {
4439 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4440 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4441 }
4442 }
4443 }
4444
4445 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_compound_using_neighbors)4446 static INLINE int compound_skip_using_neighbor_refs(
4447 MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4448 const MV_REFERENCE_FRAME *ref_frames, int prune_compound_using_neighbors) {
4449 // Exclude non-extended compound modes from pruning
4450 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4451 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4452 return 0;
4453
4454 int is_ref_match[2] = { 0 }; // 0 - match for forward refs
4455 // 1 - match for backward refs
4456 // Check if ref frames of this block matches with left neighbor.
4457 if (xd->left_available)
4458 match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4459
4460 // Check if ref frames of this block matches with above neighbor.
4461 if (xd->up_available)
4462 match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4463
4464 // Combine ref frame match with neighbors in forward and backward refs.
4465 const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4466
4467 // Pruning based on ref frame match with neighbors.
4468 if (track_ref_match >= prune_compound_using_neighbors) return 0;
4469 return 1;
4470 }
4471
4472 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4473 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4474 const PREDICTION_MODE this_mode,
4475 const MV_REFERENCE_FRAME ref_frame,
4476 int64_t this_rd) {
4477 if (this_rd < search_state->best_single_rd[ref_frame]) {
4478 search_state->best_single_rd[ref_frame] = this_rd;
4479 search_state->best_single_mode[ref_frame] = this_mode;
4480 }
4481 }
4482
4483 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4484 static INLINE int skip_compound_using_best_single_mode_ref(
4485 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4486 const PREDICTION_MODE *best_single_mode,
4487 int prune_comp_using_best_single_mode_ref) {
4488 // Exclude non-extended compound modes from pruning
4489 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4490 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4491 return 0;
4492
4493 assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4494 const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4495 // Get ref frame direction corresponding to NEWMV
4496 // 0 - NEWMV corresponding to forward direction
4497 // 1 - NEWMV corresponding to backward direction
4498 const int newmv_dir = comp_mode_ref0 != NEWMV;
4499
4500 // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4501 // have NEWMV as single mode winner.
4502 // Example: For an extended-compound mode,
4503 // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4504 // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4505 // - Avoid pruning this mode, if best single mode corresponding to ref frame
4506 // ALTREF_FRAME is NEWMV
4507 const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4508 if (single_mode == NEWMV) return 0;
4509
4510 // Avoid pruning the compound mode when best single mode is not available
4511 if (prune_comp_using_best_single_mode_ref == 1)
4512 if (single_mode == MB_MODE_COUNT) return 0;
4513 return 1;
4514 }
4515
compare_int64(const void * a,const void * b)4516 static int compare_int64(const void *a, const void *b) {
4517 int64_t a64 = *((int64_t *)a);
4518 int64_t b64 = *((int64_t *)b);
4519 if (a64 < b64) {
4520 return -1;
4521 } else if (a64 == b64) {
4522 return 0;
4523 } else {
4524 return 1;
4525 }
4526 }
4527
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4528 static INLINE void update_search_state(
4529 InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4530 PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4531 const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4532 THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4533 const MACROBLOCKD *xd = &x->e_mbd;
4534 const MB_MODE_INFO *mbmi = xd->mi[0];
4535 const int skip_ctx = av1_get_skip_txfm_context(xd);
4536 const int skip_txfm =
4537 mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4538 const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4539
4540 search_state->best_rd = new_best_rd_stats->rdcost;
4541 search_state->best_mode_index = new_best_mode;
4542 *best_rd_stats_dst = *new_best_rd_stats;
4543 search_state->best_mbmode = *mbmi;
4544 search_state->best_skip2 = skip_txfm;
4545 search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4546 // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4547 // rate_uv because av1_txfm_search process is replaced by rd estimation.
4548 // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4549 // These two values will be updated when av1_txfm_search is called.
4550 if (txfm_search_done) {
4551 search_state->best_rate_y =
4552 new_best_rd_stats_y->rate +
4553 x->mode_costs.skip_txfm_cost[skip_ctx]
4554 [new_best_rd_stats->skip_txfm || skip_txfm];
4555 search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4556 }
4557 search_state->best_y_rdcost = *new_best_rd_stats_y;
4558 memcpy(ctx->blk_skip, txfm_info->blk_skip,
4559 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4560 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4561 }
4562
4563 // Find the best RD for a reference frame (among single reference modes)
4564 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4565 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4566 assert(ref_frame_rd[0] == INT64_MAX);
4567 int64_t ref_copy[REF_FRAMES - 1];
4568 memcpy(ref_copy, ref_frame_rd + 1,
4569 sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4570 qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4571
4572 int64_t cutoff = ref_copy[0];
4573 // The cut-off is within 10% of the best.
4574 if (cutoff != INT64_MAX) {
4575 assert(cutoff < INT64_MAX / 200);
4576 cutoff = (110 * cutoff) / 100;
4577 }
4578 ref_frame_rd[0] = cutoff;
4579 }
4580
4581 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4582 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4583 MV_REFERENCE_FRAME frame1,
4584 MV_REFERENCE_FRAME frame2) {
4585 assert(frame2 > 0);
4586 return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4587 ref_frame_rd[frame2] <= ref_frame_rd[0];
4588 }
4589
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4590 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4591 const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4592 HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4593 PICK_MODE_CONTEXT *const ctx,
4594 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4595 const motion_mode_best_st_candidate *const best_motion_mode_cands,
4596 int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4597 InterModeSearchState *const search_state, int64_t *yrd) {
4598 const AV1_COMMON *const cm = &cpi->common;
4599 const int num_planes = av1_num_planes(cm);
4600 MACROBLOCKD *const xd = &x->e_mbd;
4601 MB_MODE_INFO *const mbmi = xd->mi[0];
4602 InterModesInfo *const inter_modes_info = x->inter_modes_info;
4603 const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4604
4605 for (int cand = 0; cand < num_best_cand; cand++) {
4606 RD_STATS rd_stats;
4607 RD_STATS rd_stats_y;
4608 RD_STATS rd_stats_uv;
4609 av1_init_rd_stats(&rd_stats);
4610 av1_init_rd_stats(&rd_stats_y);
4611 av1_init_rd_stats(&rd_stats_uv);
4612 int rate_mv;
4613
4614 rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4615 args->skip_motion_mode =
4616 best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4617 *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4618 rd_stats.rate =
4619 best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4620
4621 // Continue if the best candidate is compound.
4622 if (!is_inter_singleref_mode(mbmi->mode)) continue;
4623
4624 x->txfm_search_info.skip_txfm = 0;
4625 struct macroblockd_plane *p = xd->plane;
4626 const BUFFER_SET orig_dst = {
4627 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
4628 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
4629 };
4630
4631 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4632 // Initialize motion mode to simple translation
4633 // Calculation of switchable rate depends on it.
4634 mbmi->motion_mode = 0;
4635 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4636 for (int i = 0; i < num_planes; i++) {
4637 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4638 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4639 }
4640
4641 int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4642 search_state->best_skip_rd[1] };
4643 int64_t this_yrd = INT64_MAX;
4644 int64_t ret_value = motion_mode_rd(
4645 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4646 search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4647 do_tx_search, inter_modes_info, 1, &this_yrd);
4648
4649 if (ret_value != INT64_MAX) {
4650 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4651 const THR_MODES mode_enum = get_prediction_mode_idx(
4652 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4653 // Collect mode stats for multiwinner mode processing
4654 store_winner_mode_stats(
4655 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4656 mode_enum, NULL, bsize, rd_stats.rdcost,
4657 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4658 if (rd_stats.rdcost < search_state->best_rd) {
4659 *yrd = this_yrd;
4660 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4661 &rd_stats_uv, mode_enum, x, do_tx_search);
4662 if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4663 }
4664 }
4665 }
4666 }
4667
4668 /*!\cond */
4669 // Arguments for speed feature pruning of inter mode search
4670 typedef struct {
4671 int *skip_motion_mode;
4672 mode_skip_mask_t *mode_skip_mask;
4673 InterModeSearchState *search_state;
4674 int skip_ref_frame_mask;
4675 int reach_first_comp_mode;
4676 int mode_thresh_mul_fact;
4677 int intra_mode_idx_ls[INTRA_MODES];
4678 int intra_mode_num;
4679 int num_single_modes_processed;
4680 int prune_cpd_using_sr_stats_ready;
4681 } InterModeSFArgs;
4682 /*!\endcond */
4683
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args)4684 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4685 int64_t *ref_frame_rd, int midx,
4686 InterModeSFArgs *args) {
4687 const SPEED_FEATURES *const sf = &cpi->sf;
4688 MACROBLOCKD *const xd = &x->e_mbd;
4689 MB_MODE_INFO *const mbmi = xd->mi[0];
4690 // Get the actual prediction mode we are trying in this iteration
4691 const THR_MODES mode_enum = av1_default_mode_order[midx];
4692 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4693 const PREDICTION_MODE this_mode = mode_def->mode;
4694 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4695 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4696 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4697 const int comp_pred = second_ref_frame > INTRA_FRAME;
4698
4699 // Check if this mode should be skipped because it is incompatible with the
4700 // current frame
4701 if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
4702 return 1;
4703 const int ret = inter_mode_search_order_independent_skip(
4704 cpi, x, args->mode_skip_mask, args->search_state,
4705 args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
4706 if (ret == 1) return 1;
4707 *(args->skip_motion_mode) = (ret == 2);
4708
4709 // We've reached the first compound prediction mode, get stats from the
4710 // single reference predictors to help with pruning
4711 if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
4712 args->reach_first_comp_mode == 0) {
4713 analyze_single_states(cpi, args->search_state);
4714 args->reach_first_comp_mode = 1;
4715 }
4716
4717 // Prune aggressively when best mode is skippable.
4718 int mul_fact = args->search_state->best_mode_skippable
4719 ? args->mode_thresh_mul_fact
4720 : (1 << MODE_THRESH_QBITS);
4721 int64_t mode_threshold =
4722 (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
4723 MODE_THRESH_QBITS;
4724
4725 if (args->search_state->best_rd < mode_threshold) return 1;
4726
4727 // Skip this compound mode based on the RD results from the single prediction
4728 // modes
4729 if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
4730 if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
4731 ref_frame, second_ref_frame, x))
4732 return 1;
4733 }
4734
4735 // Speed features to prune out INTRA frames
4736 if (ref_frame == INTRA_FRAME) {
4737 if ((!cpi->oxcf.intra_mode_cfg.enable_smooth_intra ||
4738 sf->intra_sf.disable_smooth_intra) &&
4739 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
4740 mbmi->mode == SMOOTH_V_PRED))
4741 return 1;
4742 if (!cpi->oxcf.intra_mode_cfg.enable_paeth_intra &&
4743 mbmi->mode == PAETH_PRED)
4744 return 1;
4745
4746 // Intra modes will be handled in another loop later.
4747 assert(args->intra_mode_num < INTRA_MODES);
4748 args->intra_mode_idx_ls[args->intra_mode_num++] = mode_enum;
4749 return 1;
4750 }
4751
4752 if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
4753 // After we done with single reference modes, find the 2nd best RD
4754 // for a reference frame. Only search compound modes that have a reference
4755 // frame at least as good as the 2nd best.
4756 if (!args->prune_cpd_using_sr_stats_ready &&
4757 args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
4758 find_top_ref(ref_frame_rd);
4759 args->prune_cpd_using_sr_stats_ready = 1;
4760 }
4761 if (args->prune_cpd_using_sr_stats_ready &&
4762 !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
4763 return 1;
4764 }
4765
4766 if (sf->inter_sf.prune_compound_using_neighbors && comp_pred) {
4767 if (compound_skip_using_neighbor_refs(
4768 xd, this_mode, ref_frames,
4769 sf->inter_sf.prune_compound_using_neighbors))
4770 return 1;
4771 }
4772
4773 if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
4774 if (skip_compound_using_best_single_mode_ref(
4775 this_mode, ref_frames, args->search_state->best_single_mode,
4776 sf->inter_sf.prune_comp_using_best_single_mode_ref))
4777 return 1;
4778 }
4779
4780 return 0;
4781 }
4782
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)4783 static void record_best_compound(REFERENCE_MODE reference_mode,
4784 RD_STATS *rd_stats, int comp_pred, int rdmult,
4785 InterModeSearchState *search_state,
4786 int compmode_cost) {
4787 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4788
4789 if (reference_mode == REFERENCE_MODE_SELECT) {
4790 single_rate = rd_stats->rate - compmode_cost;
4791 hybrid_rate = rd_stats->rate;
4792 } else {
4793 single_rate = rd_stats->rate;
4794 hybrid_rate = rd_stats->rate + compmode_cost;
4795 }
4796
4797 single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
4798 hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
4799
4800 if (!comp_pred) {
4801 if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
4802 search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
4803 } else {
4804 if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
4805 search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4806 }
4807 if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
4808 search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4809 }
4810
4811 // Does a transform search over a list of the best inter mode candidates.
4812 // This is called if the original mode search computed an RD estimate
4813 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)4814 static void tx_search_best_inter_candidates(
4815 AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
4816 int64_t best_rd_so_far, BLOCK_SIZE bsize,
4817 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
4818 InterModeSearchState *search_state, RD_STATS *rd_cost,
4819 PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
4820 AV1_COMMON *const cm = &cpi->common;
4821 MACROBLOCKD *const xd = &x->e_mbd;
4822 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4823 const ModeCosts *mode_costs = &x->mode_costs;
4824 const int num_planes = av1_num_planes(cm);
4825 const int skip_ctx = av1_get_skip_txfm_context(xd);
4826 MB_MODE_INFO *const mbmi = xd->mi[0];
4827 InterModesInfo *inter_modes_info = x->inter_modes_info;
4828 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
4829 search_state->best_rd = best_rd_so_far;
4830 search_state->best_mode_index = THR_INVALID;
4831 // Initialize best mode stats for winner mode processing
4832 x->winner_mode_count = 0;
4833 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
4834 NULL, bsize, best_rd_so_far,
4835 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
4836 inter_modes_info->num =
4837 inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
4838 ? inter_modes_info->num
4839 : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
4840 const int64_t top_est_rd =
4841 inter_modes_info->num > 0
4842 ? inter_modes_info
4843 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
4844 : INT64_MAX;
4845 *yrd = INT64_MAX;
4846 int64_t best_rd_in_this_partition = INT64_MAX;
4847 // Iterate over best inter mode candidates and perform tx search
4848 for (int j = 0; j < inter_modes_info->num; ++j) {
4849 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
4850 *mbmi = inter_modes_info->mbmi_arr[data_idx];
4851 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
4852 if (curr_est_rd * 0.80 > top_est_rd) break;
4853
4854 txfm_info->skip_txfm = 0;
4855 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4856
4857 // Select prediction reference frames.
4858 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4859 for (int i = 0; i < num_planes; i++) {
4860 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4861 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4862 }
4863
4864 // Build the prediction for this mode
4865 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
4866 av1_num_planes(cm) - 1);
4867 if (mbmi->motion_mode == OBMC_CAUSAL) {
4868 av1_build_obmc_inter_predictors_sb(cm, xd);
4869 }
4870
4871 // Initialize RD stats
4872 RD_STATS rd_stats;
4873 RD_STATS rd_stats_y;
4874 RD_STATS rd_stats_uv;
4875 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
4876 int64_t skip_rd = INT64_MAX;
4877 if (cpi->sf.inter_sf.txfm_rd_gate_level) {
4878 // Check if the mode is good enough based on skip RD
4879 int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
4880 skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
4881 int eval_txfm =
4882 check_txfm_eval(x, bsize, search_state->best_skip_rd[0], skip_rd,
4883 cpi->sf.inter_sf.txfm_rd_gate_level, 0);
4884 if (!eval_txfm) continue;
4885 }
4886
4887 int64_t this_yrd = INT64_MAX;
4888 // Do the transform search
4889 if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
4890 mode_rate, search_state->best_rd)) {
4891 continue;
4892 } else {
4893 const int y_rate =
4894 rd_stats.skip_txfm
4895 ? mode_costs->skip_txfm_cost[skip_ctx][1]
4896 : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
4897 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
4898
4899 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
4900 inter_mode_data_push(
4901 tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
4902 rd_stats_y.rate + rd_stats_uv.rate +
4903 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
4904 }
4905 }
4906 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4907 if (rd_stats.rdcost < best_rd_in_this_partition) {
4908 best_rd_in_this_partition = rd_stats.rdcost;
4909 *yrd = this_yrd;
4910 }
4911
4912 const THR_MODES mode_enum = get_prediction_mode_idx(
4913 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4914
4915 // Collect mode stats for multiwinner mode processing
4916 const int txfm_search_done = 1;
4917 store_winner_mode_stats(
4918 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
4919 NULL, bsize, rd_stats.rdcost,
4920 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
4921
4922 if (rd_stats.rdcost < search_state->best_rd) {
4923 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4924 &rd_stats_uv, mode_enum, x, txfm_search_done);
4925 search_state->best_skip_rd[0] = skip_rd;
4926 }
4927 }
4928 }
4929
4930 // Indicates number of winner simple translation modes to be used
4931 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
4932
4933 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
4934 // speed feature. This list consists of modes that have only searched
4935 // SIMPLE_TRANSLATION. The final list will be used to search other motion
4936 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)4937 static void handle_winner_cand(
4938 MB_MODE_INFO *const mbmi,
4939 motion_mode_best_st_candidate *best_motion_mode_cands,
4940 int max_winner_motion_mode_cand, int64_t this_rd,
4941 motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
4942 // Number of current motion mode candidates in list
4943 const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
4944 int valid_motion_mode_cand_loc = num_motion_mode_cand;
4945
4946 // find the best location to insert new motion mode candidate
4947 for (int j = 0; j < num_motion_mode_cand; j++) {
4948 if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
4949 valid_motion_mode_cand_loc = j;
4950 break;
4951 }
4952 }
4953
4954 // Insert motion mode if location is found
4955 if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
4956 if (num_motion_mode_cand > 0 &&
4957 valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
4958 memmove(
4959 &best_motion_mode_cands
4960 ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
4961 &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
4962 (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
4963 valid_motion_mode_cand_loc) *
4964 sizeof(best_motion_mode_cands->motion_mode_cand[0]));
4965 motion_mode_cand->mbmi = *mbmi;
4966 motion_mode_cand->rd_cost = this_rd;
4967 motion_mode_cand->skip_motion_mode = skip_motion_mode;
4968 best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
4969 *motion_mode_cand;
4970 best_motion_mode_cands->num_motion_mode_cand =
4971 AOMMIN(max_winner_motion_mode_cand,
4972 best_motion_mode_cands->num_motion_mode_cand + 1);
4973 }
4974 }
4975
4976 /*!\brief Search intra modes in interframes
4977 *
4978 * \ingroup intra_mode_search
4979 *
4980 * This function searches for the best intra mode when the current frame is an
4981 * interframe. The list of luma intra mode candidates to be searched are stored
4982 * in InterModeSFArgs::intra_mode_idx_ls. This function however does *not*
4983 * handle luma palette mode. Palette mode is currently handled by \ref
4984 * av1_search_palette_mode.
4985 *
4986 * This function will first iterate through the luma mode candidates to find the
4987 * best luma intra mode. Once the best luma mode it's found, it will then search
4988 * for the best chroma mode. Because palette mode is currently not handled by
4989 * here, a cache of uv mode is stored in
4990 * InterModeSearchState::intra_search_state so it can be reused later by \ref
4991 * av1_search_palette_mode.
4992 *
4993 * \return Returns the rdcost of the current intra-mode if it's available,
4994 * otherwise returns INT64_MAX. The corresponding values in x->e_mbd.mi[0],
4995 * rd_stats, rd_stats_y/uv, and best_intra_rd are also updated. Moreover, in the
4996 * first evocation of the function, the chroma intra mode result is cached in
4997 * intra_search_state to be used in subsequent calls. In the first evaluation
4998 * with directional mode, a prune_mask computed with histogram of gradient is
4999 * also stored in intra_search_state.
5000 *
5001 * \param[in,out] search_state Struct keep track of the prediction mode
5002 * search state in interframe.
5003 *
5004 * \param[in] cpi Top-level encoder structure.
5005 * \param[in] x Pointer to struct holding all the data for
5006 * the current prediction block.
5007 * \param[out] rd_cost Stores the best rd_cost among all the
5008 * prediction modes searched.
5009 * \param[in] bsize Current block size.
5010 * \param[in,out] ctx Structure to hold the number of 4x4 blks to
5011 * copy the tx_type and txfm_skip arrays.
5012 * for only the Y plane.
5013 * \param[in,out] sf_args Stores the list of intra mode candidates
5014 * to be searched.
5015 * \param[in] intra_ref_frame_cost The entropy cost for signaling that the
5016 * current ref frame is an intra frame.
5017 * \param[in] yrd_threshold The rdcost threshold for luma intra mode to
5018 * terminate chroma intra mode search.
5019 *
5020 * \return Returns INT64_MAX if the determined motion mode is invalid and the
5021 * current motion mode being tested should be skipped. It returns 0 if the
5022 * motion mode search is a success.
5023 */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5024 static AOM_INLINE void search_intra_modes_in_interframe(
5025 InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5026 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5027 InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5028 int64_t yrd_threshold) {
5029 const AV1_COMMON *const cm = &cpi->common;
5030 const SPEED_FEATURES *const sf = &cpi->sf;
5031 MACROBLOCKD *const xd = &x->e_mbd;
5032 MB_MODE_INFO *const mbmi = xd->mi[0];
5033 IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5034
5035 int is_best_y_mode_intra = 0;
5036 RD_STATS best_intra_rd_stats_y;
5037 int64_t best_rd_y = INT64_MAX;
5038 int best_mode_cost_y = -1;
5039 MB_MODE_INFO best_mbmi = *xd->mi[0];
5040 THR_MODES best_mode_enum = THR_INVALID;
5041 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5042 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5043 const int num_4x4 = bsize_to_num_blk(bsize);
5044
5045 // Performs luma search
5046 for (int j = 0; j < sf_args->intra_mode_num; ++j) {
5047 if (sf->intra_sf.skip_intra_in_interframe &&
5048 search_state->intra_search_state.skip_intra_modes)
5049 break;
5050 const THR_MODES mode_enum = sf_args->intra_mode_idx_ls[j];
5051 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5052 const PREDICTION_MODE this_mode = mode_def->mode;
5053
5054 assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5055 assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5056 init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5057 x->txfm_search_info.skip_txfm = 0;
5058
5059 if (this_mode != DC_PRED) {
5060 // Only search the oblique modes if the best so far is
5061 // one of the neighboring directional modes
5062 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5063 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5064 if (search_state->best_mode_index != THR_INVALID &&
5065 search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5066 continue;
5067 }
5068 if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5069 if (conditional_skipintra(
5070 this_mode, search_state->intra_search_state.best_intra_mode))
5071 continue;
5072 }
5073 }
5074
5075 RD_STATS intra_rd_stats_y;
5076 int mode_cost_y;
5077 int64_t intra_rd_y = INT64_MAX;
5078 const int is_luma_result_valid = av1_handle_intra_y_mode(
5079 intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5080 &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y);
5081 if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5082 is_best_y_mode_intra = 1;
5083 if (intra_rd_y < best_rd_y) {
5084 best_intra_rd_stats_y = intra_rd_stats_y;
5085 best_mode_cost_y = mode_cost_y;
5086 best_rd_y = intra_rd_y;
5087 best_mbmi = *mbmi;
5088 best_mode_enum = mode_enum;
5089 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5090 sizeof(best_blk_skip[0]) * num_4x4);
5091 av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5092 }
5093 }
5094 }
5095
5096 if (!is_best_y_mode_intra) {
5097 return;
5098 }
5099
5100 assert(best_rd_y < INT64_MAX);
5101
5102 // Restores the best luma mode
5103 *mbmi = best_mbmi;
5104 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5105 sizeof(best_blk_skip[0]) * num_4x4);
5106 av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5107
5108 // Performs chroma search
5109 RD_STATS intra_rd_stats, intra_rd_stats_uv;
5110 av1_init_rd_stats(&intra_rd_stats);
5111 av1_init_rd_stats(&intra_rd_stats_uv);
5112 const int num_planes = av1_num_planes(cm);
5113 if (num_planes > 1) {
5114 const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5115 intra_search_state, cpi, x, bsize, &intra_rd_stats,
5116 &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5117
5118 if (!intra_uv_mode_valid) {
5119 return;
5120 }
5121 }
5122
5123 // Merge the luma and chroma rd stats
5124 assert(best_mode_cost_y >= 0);
5125 intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5126 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5127 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5128 // in the tokenonly rate, but for intra blocks, tx_size is always coded
5129 // (prediction granularity), so we account for it in the full rate,
5130 // not the tokenonly rate.
5131 best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5132 }
5133
5134 const ModeCosts *mode_costs = &x->mode_costs;
5135 const PREDICTION_MODE mode = mbmi->mode;
5136 if (num_planes > 1 && xd->is_chroma_ref) {
5137 const int uv_mode_cost =
5138 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5139 intra_rd_stats.rate +=
5140 intra_rd_stats_uv.rate +
5141 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5142 }
5143 if (mode != DC_PRED && mode != PAETH_PRED) {
5144 const int intra_cost_penalty = av1_get_intra_cost_penalty(
5145 cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
5146 cm->seq_params.bit_depth);
5147 intra_rd_stats.rate += intra_cost_penalty;
5148 }
5149
5150 // Intra block is always coded as non-skip
5151 intra_rd_stats.skip_txfm = 0;
5152 intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5153 // Add in the cost of the no skip flag.
5154 const int skip_ctx = av1_get_skip_txfm_context(xd);
5155 intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5156 // Calculate the final RD estimate for this mode.
5157 const int64_t this_rd =
5158 RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5159 // Keep record of best intra rd
5160 if (this_rd < search_state->best_intra_rd) {
5161 search_state->best_intra_rd = this_rd;
5162 intra_search_state->best_intra_mode = mode;
5163 }
5164
5165 for (int i = 0; i < REFERENCE_MODES; ++i) {
5166 search_state->best_pred_rd[i] =
5167 AOMMIN(search_state->best_pred_rd[i], this_rd);
5168 }
5169
5170 intra_rd_stats.rdcost = this_rd;
5171
5172 // Collect mode stats for multiwinner mode processing
5173 const int txfm_search_done = 1;
5174 store_winner_mode_stats(
5175 &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5176 &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5177 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5178 if (intra_rd_stats.rdcost < search_state->best_rd) {
5179 update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5180 &best_intra_rd_stats_y, &intra_rd_stats_uv,
5181 best_mode_enum, x, txfm_search_done);
5182 }
5183 }
5184
5185 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5186 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5187 struct macroblock *x, struct RD_STATS *rd_cost,
5188 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5189 int64_t best_rd_so_far) {
5190 AV1_COMMON *const cm = &cpi->common;
5191 const FeatureFlags *const features = &cm->features;
5192 const int num_planes = av1_num_planes(cm);
5193 const SPEED_FEATURES *const sf = &cpi->sf;
5194 MACROBLOCKD *const xd = &x->e_mbd;
5195 MB_MODE_INFO *const mbmi = xd->mi[0];
5196 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5197 int i;
5198 const ModeCosts *mode_costs = &x->mode_costs;
5199 const int *comp_inter_cost =
5200 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5201
5202 InterModeSearchState search_state;
5203 init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5204 INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5205 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5206 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5207 };
5208 HandleInterModeArgs args = { { NULL },
5209 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5210 { NULL },
5211 { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5212 MAX_SB_SIZE >> 1 },
5213 NULL,
5214 NULL,
5215 NULL,
5216 search_state.modelled_rd,
5217 INT_MAX,
5218 INT_MAX,
5219 search_state.simple_rd,
5220 0,
5221 interintra_modes,
5222 { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5223 0,
5224 -1,
5225 -1,
5226 -1,
5227 { 0 } };
5228 for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5229 // Indicates the appropriate number of simple translation winner modes for
5230 // exhaustive motion mode evaluation
5231 const int max_winner_motion_mode_cand =
5232 num_winner_motion_modes[cpi->sf.winner_mode_sf
5233 .motion_mode_for_winner_cand];
5234 assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5235 motion_mode_candidate motion_mode_cand;
5236 motion_mode_best_st_candidate best_motion_mode_cands;
5237 // Initializing the number of motion mode candidates to zero.
5238 best_motion_mode_cands.num_motion_mode_cand = 0;
5239 for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5240 best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5241
5242 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5243
5244 av1_invalid_rd_stats(rd_cost);
5245
5246 for (i = 0; i < REF_FRAMES; ++i) {
5247 x->warp_sample_info[i].num = -1;
5248 }
5249
5250 // Ref frames that are selected by square partition blocks.
5251 int picked_ref_frames_mask = 0;
5252 if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions &&
5253 mbmi->partition != PARTITION_NONE) {
5254 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5255 // partition blocks. prune_ref_frame_for_rect_partitions >=2
5256 // implies prune for vert, horiz and extended partition blocks.
5257 if ((mbmi->partition != PARTITION_VERT &&
5258 mbmi->partition != PARTITION_HORZ) ||
5259 cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5260 picked_ref_frames_mask =
5261 fetch_picked_ref_frames_mask(x, bsize, cm->seq_params.mib_size);
5262 }
5263 }
5264
5265 // Skip ref frames that never selected by square blocks.
5266 const int skip_ref_frame_mask =
5267 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5268 mode_skip_mask_t mode_skip_mask;
5269 unsigned int ref_costs_single[REF_FRAMES];
5270 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5271 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5272 // init params, set frame modes, speed features
5273 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5274 skip_ref_frame_mask, ref_costs_single,
5275 ref_costs_comp, yv12_mb);
5276
5277 int64_t best_est_rd = INT64_MAX;
5278 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5279 // If do_tx_search is 0, only estimated RD should be computed.
5280 // If do_tx_search is 1, all modes have TX search performed.
5281 const int do_tx_search =
5282 !((cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5283 (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 &&
5284 num_pels_log2_lookup[bsize] > 8) ||
5285 cpi->sf.rt_sf.force_tx_search_off);
5286 InterModesInfo *inter_modes_info = x->inter_modes_info;
5287 inter_modes_info->num = 0;
5288
5289 // Temporary buffers used by handle_inter_mode().
5290 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5291
5292 // The best RD found for the reference frame, among single reference modes.
5293 // Note that the 0-th element will contain a cut-off that is later used
5294 // to determine if we should skip a compound mode.
5295 int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5296 INT64_MAX, INT64_MAX, INT64_MAX,
5297 INT64_MAX, INT64_MAX };
5298
5299 // Prepared stats used later to check if we could skip intra mode eval.
5300 int64_t inter_cost = -1;
5301 int64_t intra_cost = -1;
5302 // Need to tweak the threshold for hdres speed 0 & 1.
5303 const int mi_row = xd->mi_row;
5304 const int mi_col = xd->mi_col;
5305
5306 // Obtain the relevant tpl stats for pruning inter modes
5307 PruneInfoFromTpl inter_cost_info_from_tpl;
5308 #if !CONFIG_REALTIME_ONLY
5309 if (cpi->sf.inter_sf.prune_inter_modes_based_on_tpl) {
5310 // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5311 // prune_ref_by_selective_ref_frame()
5312 // x->tpl_keep_ref_frame[id] = 0 => ref frame can be pruned in
5313 // prune_ref_by_selective_ref_frame()
5314 // Populating valid_refs[idx] = 1 ensures that
5315 // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5316 // pruned ref frame.
5317 int valid_refs[INTER_REFS_PER_FRAME];
5318 for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5319 const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5320 valid_refs[frame - 1] =
5321 x->tpl_keep_ref_frame[frame] ||
5322 !prune_ref_by_selective_ref_frame(
5323 cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5324 }
5325 av1_zero(inter_cost_info_from_tpl);
5326 get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5327 &inter_cost_info_from_tpl);
5328 }
5329 #endif
5330 const int do_pruning =
5331 (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5332 if (do_pruning && sf->intra_sf.skip_intra_in_interframe) {
5333 // Only consider full SB.
5334 const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
5335 const int tpl_bsize_1d = cpi->tpl_data.tpl_bsize_1d;
5336 const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5337 (block_size_high[sb_size] / tpl_bsize_1d);
5338 SuperBlockEnc *sb_enc = &x->sb_enc;
5339 if (sb_enc->tpl_data_count == len) {
5340 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5341 const int tpl_stride = sb_enc->tpl_stride;
5342 const int tplw = mi_size_wide[tpl_bsize];
5343 const int tplh = mi_size_high[tpl_bsize];
5344 const int nw = mi_size_wide[bsize] / tplw;
5345 const int nh = mi_size_high[bsize] / tplh;
5346 if (nw >= 1 && nh >= 1) {
5347 const int of_h = mi_row % mi_size_high[sb_size];
5348 const int of_w = mi_col % mi_size_wide[sb_size];
5349 const int start = of_h / tplh * tpl_stride + of_w / tplw;
5350
5351 for (int k = 0; k < nh; k++) {
5352 for (int l = 0; l < nw; l++) {
5353 inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5354 intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5355 }
5356 }
5357 inter_cost /= nw * nh;
5358 intra_cost /= nw * nh;
5359 }
5360 }
5361 }
5362
5363 // Initialize best mode stats for winner mode processing
5364 av1_zero(x->winner_mode_stats);
5365 x->winner_mode_count = 0;
5366 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5367 NULL, bsize, best_rd_so_far,
5368 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5369
5370 int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5371 if (sf->inter_sf.prune_inter_modes_if_skippable) {
5372 // Higher multiplication factor values for lower quantizers.
5373 mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5374 }
5375
5376 // Initialize arguments for mode loop speed features
5377 InterModeSFArgs sf_args = { &args.skip_motion_mode,
5378 &mode_skip_mask,
5379 &search_state,
5380 skip_ref_frame_mask,
5381 0,
5382 mode_thresh_mul_fact,
5383 { 0 },
5384 0,
5385 0,
5386 0 };
5387 int64_t best_inter_yrd = INT64_MAX;
5388
5389 // This is the main loop of this function. It loops over all possible modes
5390 // and calls handle_inter_mode() to compute the RD for each.
5391 // Here midx is just an iterator index that should not be used by itself
5392 // except to keep track of the number of modes searched. It should be used
5393 // with av1_default_mode_order to get the enum that defines the mode, which
5394 // can be used with av1_mode_defs to get the prediction mode and the ref
5395 // frames.
5396 for (THR_MODES midx = THR_MODE_START; midx < THR_MODE_END; ++midx) {
5397 // Get the actual prediction mode we are trying in this iteration
5398 const THR_MODES mode_enum = av1_default_mode_order[midx];
5399 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5400 const PREDICTION_MODE this_mode = mode_def->mode;
5401 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5402
5403 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5404 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5405 const int is_single_pred =
5406 ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5407 const int comp_pred = second_ref_frame > INTRA_FRAME;
5408
5409 init_mbmi(mbmi, this_mode, ref_frames, cm);
5410
5411 txfm_info->skip_txfm = 0;
5412 sf_args.num_single_modes_processed += is_single_pred;
5413 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5414
5415 // Apply speed features to decide if this inter mode can be skipped
5416 if (skip_inter_mode(cpi, x, bsize, ref_frame_rd, midx, &sf_args)) continue;
5417
5418 // Select prediction reference frames.
5419 for (i = 0; i < num_planes; i++) {
5420 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5421 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5422 }
5423
5424 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5425 mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5426 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5427 mbmi->ref_mv_idx = 0;
5428
5429 const int64_t ref_best_rd = search_state.best_rd;
5430 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5431 av1_init_rd_stats(&rd_stats);
5432
5433 const int ref_frame_cost = comp_pred
5434 ? ref_costs_comp[ref_frame][second_ref_frame]
5435 : ref_costs_single[ref_frame];
5436 const int compmode_cost =
5437 is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
5438 const int real_compmode_cost =
5439 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
5440 ? compmode_cost
5441 : 0;
5442 // Point to variables that are maintained between loop iterations
5443 args.single_newmv = search_state.single_newmv;
5444 args.single_newmv_rate = search_state.single_newmv_rate;
5445 args.single_newmv_valid = search_state.single_newmv_valid;
5446 args.single_comp_cost = real_compmode_cost;
5447 args.ref_frame_cost = ref_frame_cost;
5448
5449 int64_t skip_rd[2] = { search_state.best_skip_rd[0],
5450 search_state.best_skip_rd[1] };
5451 int64_t this_yrd = INT64_MAX;
5452 #if CONFIG_COLLECT_COMPONENT_TIMING
5453 start_timing(cpi, handle_inter_mode_time);
5454 #endif
5455 int64_t this_rd = handle_inter_mode(
5456 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
5457 ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
5458 inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
5459 &this_yrd);
5460 #if CONFIG_COLLECT_COMPONENT_TIMING
5461 end_timing(cpi, handle_inter_mode_time);
5462 #endif
5463 if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
5464 is_inter_singleref_mode(this_mode)) {
5465 collect_single_states(x, &search_state, mbmi);
5466 }
5467
5468 if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
5469 is_inter_singleref_mode(this_mode))
5470 update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
5471
5472 if (this_rd == INT64_MAX) continue;
5473
5474 if (mbmi->skip_txfm) {
5475 rd_stats_y.rate = 0;
5476 rd_stats_uv.rate = 0;
5477 }
5478
5479 if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
5480 this_rd < ref_frame_rd[ref_frame]) {
5481 ref_frame_rd[ref_frame] = this_rd;
5482 }
5483
5484 // Did this mode help, i.e., is it the new best mode
5485 if (this_rd < search_state.best_rd) {
5486 assert(IMPLIES(comp_pred,
5487 cm->current_frame.reference_mode != SINGLE_REFERENCE));
5488 search_state.best_pred_sse = x->pred_sse[ref_frame];
5489 best_inter_yrd = this_yrd;
5490 update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5491 &rd_stats_uv, mode_enum, x, do_tx_search);
5492 if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
5493 search_state.best_skip_rd[1] = skip_rd[1];
5494 }
5495 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5496 // Add this mode to motion mode candidate list for motion mode search
5497 // if using motion_mode_for_winner_cand speed feature
5498 handle_winner_cand(mbmi, &best_motion_mode_cands,
5499 max_winner_motion_mode_cand, this_rd,
5500 &motion_mode_cand, args.skip_motion_mode);
5501 }
5502
5503 /* keep record of best compound/single-only prediction */
5504 record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
5505 x->rdmult, &search_state, compmode_cost);
5506 }
5507
5508 #if CONFIG_COLLECT_COMPONENT_TIMING
5509 start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5510 #endif
5511 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5512 // For the single ref winner candidates, evaluate other motion modes (non
5513 // simple translation).
5514 evaluate_motion_mode_for_winner_candidates(
5515 cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
5516 &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
5517 &search_state, &best_inter_yrd);
5518 }
5519 #if CONFIG_COLLECT_COMPONENT_TIMING
5520 end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5521 #endif
5522
5523 #if CONFIG_COLLECT_COMPONENT_TIMING
5524 start_timing(cpi, do_tx_search_time);
5525 #endif
5526 if (do_tx_search != 1) {
5527 // A full tx search has not yet been done, do tx search for
5528 // top mode candidates
5529 tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
5530 yv12_mb, mi_row, mi_col, &search_state,
5531 rd_cost, ctx, &best_inter_yrd);
5532 }
5533 #if CONFIG_COLLECT_COMPONENT_TIMING
5534 end_timing(cpi, do_tx_search_time);
5535 #endif
5536
5537 #if CONFIG_COLLECT_COMPONENT_TIMING
5538 start_timing(cpi, handle_intra_mode_time);
5539 #endif
5540 // Gate intra mode evaluation if best of inter is skip except when source
5541 // variance is extremely low
5542 const unsigned int src_var_thresh_intra_skip = 1;
5543 if (sf->intra_sf.skip_intra_in_interframe &&
5544 (x->source_variance > src_var_thresh_intra_skip)) {
5545 if (inter_cost >= 0 && intra_cost >= 0) {
5546 aom_clear_system_state();
5547 const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5548 ? &av1_intrap_nn_config
5549 : &av1_intrap_hd_nn_config;
5550 float nn_features[6];
5551 float scores[2] = { 0.0f };
5552 float probs[2] = { 0.0f };
5553
5554 nn_features[0] = (float)search_state.best_mbmode.skip_txfm;
5555 nn_features[1] = (float)mi_size_wide_log2[bsize];
5556 nn_features[2] = (float)mi_size_high_log2[bsize];
5557 nn_features[3] = (float)intra_cost;
5558 nn_features[4] = (float)inter_cost;
5559 const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5560 const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5561 nn_features[5] = (float)(ac_q_max / ac_q);
5562
5563 av1_nn_predict(nn_features, nn_config, 1, scores);
5564 aom_clear_system_state();
5565 av1_nn_softmax(scores, probs, 2);
5566
5567 if (probs[1] > 0.8) search_state.intra_search_state.skip_intra_modes = 1;
5568 } else if ((search_state.best_mbmode.skip_txfm) &&
5569 (sf->intra_sf.skip_intra_in_interframe >= 2)) {
5570 search_state.intra_search_state.skip_intra_modes = 1;
5571 }
5572 }
5573
5574 const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
5575 search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
5576 &sf_args, intra_ref_frame_cost,
5577 best_inter_yrd);
5578 #if CONFIG_COLLECT_COMPONENT_TIMING
5579 end_timing(cpi, handle_intra_mode_time);
5580 #endif
5581
5582 int winner_mode_count =
5583 cpi->sf.winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
5584 // In effect only when fast tx search speed features are enabled.
5585 refine_winner_mode_tx(
5586 cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
5587 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
5588 search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
5589
5590 // Initialize default mode evaluation params
5591 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
5592
5593 // Only try palette mode when the best mode so far is an intra mode.
5594 const int try_palette =
5595 cpi->oxcf.tool_cfg.enable_palette &&
5596 av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
5597 !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
5598 RD_STATS this_rd_cost;
5599 int this_skippable = 0;
5600 if (try_palette) {
5601 #if CONFIG_COLLECT_COMPONENT_TIMING
5602 start_timing(cpi, av1_search_palette_mode_time);
5603 #endif
5604 this_skippable = av1_search_palette_mode(
5605 &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
5606 ctx, &this_rd_cost, search_state.best_rd);
5607 #if CONFIG_COLLECT_COMPONENT_TIMING
5608 end_timing(cpi, av1_search_palette_mode_time);
5609 #endif
5610 if (this_rd_cost.rdcost < search_state.best_rd) {
5611 search_state.best_mode_index = THR_DC;
5612 mbmi->mv[0].as_int = 0;
5613 rd_cost->rate = this_rd_cost.rate;
5614 rd_cost->dist = this_rd_cost.dist;
5615 rd_cost->rdcost = this_rd_cost.rdcost;
5616 search_state.best_rd = rd_cost->rdcost;
5617 search_state.best_mbmode = *mbmi;
5618 search_state.best_skip2 = 0;
5619 search_state.best_mode_skippable = this_skippable;
5620 memcpy(ctx->blk_skip, txfm_info->blk_skip,
5621 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
5622 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5623 }
5624 }
5625
5626 search_state.best_mbmode.skip_mode = 0;
5627 if (cm->current_frame.skip_mode_info.skip_mode_flag &&
5628 is_comp_ref_allowed(bsize)) {
5629 const struct segmentation *const seg = &cm->seg;
5630 unsigned char segment_id = mbmi->segment_id;
5631 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
5632 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
5633 }
5634 }
5635
5636 // Make sure that the ref_mv_idx is only nonzero when we're
5637 // using a mode which can support ref_mv_idx
5638 if (search_state.best_mbmode.ref_mv_idx != 0 &&
5639 !(search_state.best_mbmode.mode == NEWMV ||
5640 search_state.best_mbmode.mode == NEW_NEWMV ||
5641 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
5642 search_state.best_mbmode.ref_mv_idx = 0;
5643 }
5644
5645 if (search_state.best_mode_index == THR_INVALID ||
5646 search_state.best_rd >= best_rd_so_far) {
5647 rd_cost->rate = INT_MAX;
5648 rd_cost->rdcost = INT64_MAX;
5649 return;
5650 }
5651
5652 const InterpFilter interp_filter = features->interp_filter;
5653 assert((interp_filter == SWITCHABLE) ||
5654 (interp_filter ==
5655 search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
5656 !is_inter_block(&search_state.best_mbmode));
5657 assert((interp_filter == SWITCHABLE) ||
5658 (interp_filter ==
5659 search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
5660 !is_inter_block(&search_state.best_mbmode));
5661
5662 if (!cpi->rc.is_src_frame_alt_ref && cpi->sf.inter_sf.adaptive_rd_thresh) {
5663 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
5664 sf->inter_sf.adaptive_rd_thresh, bsize,
5665 search_state.best_mode_index);
5666 }
5667
5668 // macroblock modes
5669 *mbmi = search_state.best_mbmode;
5670 txfm_info->skip_txfm |= search_state.best_skip2;
5671
5672 // Note: this section is needed since the mode may have been forced to
5673 // GLOBALMV by the all-zero mode handling of ref-mv.
5674 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
5675 // Correct the interp filters for GLOBALMV
5676 if (is_nontrans_global_motion(xd, xd->mi[0])) {
5677 int_interpfilters filters =
5678 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
5679 assert(mbmi->interp_filters.as_int == filters.as_int);
5680 (void)filters;
5681 }
5682 }
5683
5684 for (i = 0; i < REFERENCE_MODES; ++i) {
5685 if (search_state.best_pred_rd[i] == INT64_MAX) {
5686 search_state.best_pred_diff[i] = INT_MIN;
5687 } else {
5688 search_state.best_pred_diff[i] =
5689 search_state.best_rd - search_state.best_pred_rd[i];
5690 }
5691 }
5692
5693 txfm_info->skip_txfm |= search_state.best_mode_skippable;
5694
5695 assert(search_state.best_mode_index != THR_INVALID);
5696
5697 #if CONFIG_INTERNAL_STATS
5698 store_coding_context(x, ctx, search_state.best_mode_index,
5699 search_state.best_pred_diff,
5700 search_state.best_mode_skippable);
5701 #else
5702 store_coding_context(x, ctx, search_state.best_pred_diff,
5703 search_state.best_mode_skippable);
5704 #endif // CONFIG_INTERNAL_STATS
5705
5706 if (mbmi->palette_mode_info.palette_size[1] > 0) {
5707 assert(try_palette);
5708 av1_restore_uv_color_map(cpi, x);
5709 }
5710 }
5711
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5712 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
5713 TileDataEnc *tile_data, MACROBLOCK *x,
5714 int mi_row, int mi_col,
5715 RD_STATS *rd_cost, BLOCK_SIZE bsize,
5716 PICK_MODE_CONTEXT *ctx,
5717 int64_t best_rd_so_far) {
5718 const AV1_COMMON *const cm = &cpi->common;
5719 const FeatureFlags *const features = &cm->features;
5720 MACROBLOCKD *const xd = &x->e_mbd;
5721 MB_MODE_INFO *const mbmi = xd->mi[0];
5722 unsigned char segment_id = mbmi->segment_id;
5723 const int comp_pred = 0;
5724 int i;
5725 int64_t best_pred_diff[REFERENCE_MODES];
5726 unsigned int ref_costs_single[REF_FRAMES];
5727 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5728 const ModeCosts *mode_costs = &x->mode_costs;
5729 const int *comp_inter_cost =
5730 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5731 InterpFilter best_filter = SWITCHABLE;
5732 int64_t this_rd = INT64_MAX;
5733 int rate2 = 0;
5734 const int64_t distortion2 = 0;
5735 (void)mi_row;
5736 (void)mi_col;
5737 (void)tile_data;
5738
5739 av1_collect_neighbors_ref_counts(xd);
5740
5741 estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
5742 ref_costs_comp);
5743
5744 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5745 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
5746
5747 rd_cost->rate = INT_MAX;
5748
5749 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
5750
5751 mbmi->palette_mode_info.palette_size[0] = 0;
5752 mbmi->palette_mode_info.palette_size[1] = 0;
5753 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5754 mbmi->mode = GLOBALMV;
5755 mbmi->motion_mode = SIMPLE_TRANSLATION;
5756 mbmi->uv_mode = UV_DC_PRED;
5757 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
5758 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
5759 else
5760 mbmi->ref_frame[0] = LAST_FRAME;
5761 mbmi->ref_frame[1] = NONE_FRAME;
5762 mbmi->mv[0].as_int =
5763 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
5764 features->allow_high_precision_mv, bsize, mi_col,
5765 mi_row, features->cur_frame_force_integer_mv)
5766 .as_int;
5767 mbmi->tx_size = max_txsize_lookup[bsize];
5768 x->txfm_search_info.skip_txfm = 1;
5769
5770 mbmi->ref_mv_idx = 0;
5771
5772 mbmi->motion_mode = SIMPLE_TRANSLATION;
5773 av1_count_overlappable_neighbors(cm, xd);
5774 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
5775 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
5776 mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
5777 // Select the samples according to motion vector difference
5778 if (mbmi->num_proj_ref > 1) {
5779 mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
5780 mbmi->num_proj_ref, bsize);
5781 }
5782 }
5783
5784 const InterpFilter interp_filter = features->interp_filter;
5785 set_default_interp_filters(mbmi, interp_filter);
5786
5787 if (interp_filter != SWITCHABLE) {
5788 best_filter = interp_filter;
5789 } else {
5790 best_filter = EIGHTTAP_REGULAR;
5791 if (av1_is_interp_needed(xd)) {
5792 int rs;
5793 int best_rs = INT_MAX;
5794 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
5795 mbmi->interp_filters = av1_broadcast_interp_filter(i);
5796 rs = av1_get_switchable_rate(x, xd, interp_filter,
5797 cm->seq_params.enable_dual_filter);
5798 if (rs < best_rs) {
5799 best_rs = rs;
5800 best_filter = mbmi->interp_filters.as_filters.y_filter;
5801 }
5802 }
5803 }
5804 }
5805 // Set the appropriate filter
5806 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
5807 rate2 += av1_get_switchable_rate(x, xd, interp_filter,
5808 cm->seq_params.enable_dual_filter);
5809
5810 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
5811 rate2 += comp_inter_cost[comp_pred];
5812
5813 // Estimate the reference frame signaling cost and add it
5814 // to the rolling cost variable.
5815 rate2 += ref_costs_single[LAST_FRAME];
5816 this_rd = RDCOST(x->rdmult, rate2, distortion2);
5817
5818 rd_cost->rate = rate2;
5819 rd_cost->dist = distortion2;
5820 rd_cost->rdcost = this_rd;
5821
5822 if (this_rd >= best_rd_so_far) {
5823 rd_cost->rate = INT_MAX;
5824 rd_cost->rdcost = INT64_MAX;
5825 return;
5826 }
5827
5828 assert((interp_filter == SWITCHABLE) ||
5829 (interp_filter == mbmi->interp_filters.as_filters.y_filter));
5830
5831 if (cpi->sf.inter_sf.adaptive_rd_thresh) {
5832 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
5833 cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
5834 THR_GLOBALMV);
5835 }
5836
5837 av1_zero(best_pred_diff);
5838
5839 #if CONFIG_INTERNAL_STATS
5840 store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
5841 #else
5842 store_coding_context(x, ctx, best_pred_diff, 0);
5843 #endif // CONFIG_INTERNAL_STATS
5844 }
5845
5846 /*!\cond */
5847 struct calc_target_weighted_pred_ctxt {
5848 const OBMCBuffer *obmc_buffer;
5849 const uint8_t *tmp;
5850 int tmp_stride;
5851 int overlap;
5852 };
5853 /*!\endcond */
5854
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)5855 static INLINE void calc_target_weighted_pred_above(
5856 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
5857 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
5858 (void)nb_mi;
5859 (void)num_planes;
5860 (void)rel_mi_row;
5861 (void)dir;
5862
5863 struct calc_target_weighted_pred_ctxt *ctxt =
5864 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
5865
5866 const int bw = xd->width << MI_SIZE_LOG2;
5867 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
5868
5869 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
5870 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
5871 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
5872 const int is_hbd = is_cur_buf_hbd(xd);
5873
5874 if (!is_hbd) {
5875 for (int row = 0; row < ctxt->overlap; ++row) {
5876 const uint8_t m0 = mask1d[row];
5877 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5878 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
5879 wsrc[col] = m1 * tmp[col];
5880 mask[col] = m0;
5881 }
5882 wsrc += bw;
5883 mask += bw;
5884 tmp += ctxt->tmp_stride;
5885 }
5886 } else {
5887 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
5888
5889 for (int row = 0; row < ctxt->overlap; ++row) {
5890 const uint8_t m0 = mask1d[row];
5891 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5892 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
5893 wsrc[col] = m1 * tmp16[col];
5894 mask[col] = m0;
5895 }
5896 wsrc += bw;
5897 mask += bw;
5898 tmp16 += ctxt->tmp_stride;
5899 }
5900 }
5901 }
5902
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)5903 static INLINE void calc_target_weighted_pred_left(
5904 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
5905 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
5906 (void)nb_mi;
5907 (void)num_planes;
5908 (void)rel_mi_col;
5909 (void)dir;
5910
5911 struct calc_target_weighted_pred_ctxt *ctxt =
5912 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
5913
5914 const int bw = xd->width << MI_SIZE_LOG2;
5915 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
5916
5917 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
5918 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
5919 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
5920 const int is_hbd = is_cur_buf_hbd(xd);
5921
5922 if (!is_hbd) {
5923 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
5924 for (int col = 0; col < ctxt->overlap; ++col) {
5925 const uint8_t m0 = mask1d[col];
5926 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5927 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
5928 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
5929 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
5930 }
5931 wsrc += bw;
5932 mask += bw;
5933 tmp += ctxt->tmp_stride;
5934 }
5935 } else {
5936 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
5937
5938 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
5939 for (int col = 0; col < ctxt->overlap; ++col) {
5940 const uint8_t m0 = mask1d[col];
5941 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5942 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
5943 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
5944 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
5945 }
5946 wsrc += bw;
5947 mask += bw;
5948 tmp16 += ctxt->tmp_stride;
5949 }
5950 }
5951 }
5952
5953 // This function has a structure similar to av1_build_obmc_inter_prediction
5954 //
5955 // The OBMC predictor is computed as:
5956 //
5957 // PObmc(x,y) =
5958 // AOM_BLEND_A64(Mh(x),
5959 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
5960 // PLeft(x, y))
5961 //
5962 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
5963 // rounding, this can be written as:
5964 //
5965 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
5966 // Mh(x) * Mv(y) * P(x,y) +
5967 // Mh(x) * Cv(y) * Pabove(x,y) +
5968 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
5969 //
5970 // Where :
5971 //
5972 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
5973 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
5974 //
5975 // This function computes 'wsrc' and 'mask' as:
5976 //
5977 // wsrc(x, y) =
5978 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
5979 // Mh(x) * Cv(y) * Pabove(x,y) +
5980 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
5981 //
5982 // mask(x, y) = Mh(x) * Mv(y)
5983 //
5984 // These can then be used to efficiently approximate the error for any
5985 // predictor P in the context of the provided neighbouring predictors by
5986 // computing:
5987 //
5988 // error(x, y) =
5989 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
5990 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)5991 static AOM_INLINE void calc_target_weighted_pred(
5992 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
5993 const uint8_t *above, int above_stride, const uint8_t *left,
5994 int left_stride) {
5995 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
5996 const int bw = xd->width << MI_SIZE_LOG2;
5997 const int bh = xd->height << MI_SIZE_LOG2;
5998 const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
5999 int32_t *mask_buf = obmc_buffer->mask;
6000 int32_t *wsrc_buf = obmc_buffer->wsrc;
6001
6002 const int is_hbd = is_cur_buf_hbd(xd);
6003 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6004
6005 // plane 0 should not be sub-sampled
6006 assert(xd->plane[0].subsampling_x == 0);
6007 assert(xd->plane[0].subsampling_y == 0);
6008
6009 av1_zero_array(wsrc_buf, bw * bh);
6010 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6011
6012 // handle above row
6013 if (xd->up_available) {
6014 const int overlap =
6015 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6016 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6017 above_stride, overlap };
6018 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6019 max_neighbor_obmc[mi_size_wide_log2[bsize]],
6020 calc_target_weighted_pred_above, &ctxt);
6021 }
6022
6023 for (int i = 0; i < bw * bh; ++i) {
6024 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6025 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6026 }
6027
6028 // handle left column
6029 if (xd->left_available) {
6030 const int overlap =
6031 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6032 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6033 left_stride, overlap };
6034 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6035 max_neighbor_obmc[mi_size_high_log2[bsize]],
6036 calc_target_weighted_pred_left, &ctxt);
6037 }
6038
6039 if (!is_hbd) {
6040 const uint8_t *src = x->plane[0].src.buf;
6041
6042 for (int row = 0; row < bh; ++row) {
6043 for (int col = 0; col < bw; ++col) {
6044 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6045 }
6046 wsrc_buf += bw;
6047 src += x->plane[0].src.stride;
6048 }
6049 } else {
6050 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6051
6052 for (int row = 0; row < bh; ++row) {
6053 for (int col = 0; col < bw; ++col) {
6054 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6055 }
6056 wsrc_buf += bw;
6057 src += x->plane[0].src.stride;
6058 }
6059 }
6060 }
6061
6062 /* Use standard 3x3 Sobel matrix. Macro so it can be used for either high or
6063 low bit-depth arrays. */
6064 #define SOBEL_X(src, stride, i, j) \
6065 ((src)[((i)-1) + (stride) * ((j)-1)] - \
6066 (src)[((i) + 1) + (stride) * ((j)-1)] + /* NOLINT */ \
6067 2 * (src)[((i)-1) + (stride) * (j)] - /* NOLINT */ \
6068 2 * (src)[((i) + 1) + (stride) * (j)] + /* NOLINT */ \
6069 (src)[((i)-1) + (stride) * ((j) + 1)] - /* NOLINT */ \
6070 (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6071 #define SOBEL_Y(src, stride, i, j) \
6072 ((src)[((i)-1) + (stride) * ((j)-1)] + \
6073 2 * (src)[(i) + (stride) * ((j)-1)] + /* NOLINT */ \
6074 (src)[((i) + 1) + (stride) * ((j)-1)] - /* NOLINT */ \
6075 (src)[((i)-1) + (stride) * ((j) + 1)] - /* NOLINT */ \
6076 2 * (src)[(i) + (stride) * ((j) + 1)] - /* NOLINT */ \
6077 (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6078
av1_sobel(const uint8_t * input,int stride,int i,int j,bool high_bd)6079 sobel_xy av1_sobel(const uint8_t *input, int stride, int i, int j,
6080 bool high_bd) {
6081 int16_t s_x;
6082 int16_t s_y;
6083 if (high_bd) {
6084 const uint16_t *src = CONVERT_TO_SHORTPTR(input);
6085 s_x = SOBEL_X(src, stride, i, j);
6086 s_y = SOBEL_Y(src, stride, i, j);
6087 } else {
6088 s_x = SOBEL_X(input, stride, i, j);
6089 s_y = SOBEL_Y(input, stride, i, j);
6090 }
6091 sobel_xy r = { .x = s_x, .y = s_y };
6092 return r;
6093 }
6094
6095 // 8-tap Gaussian convolution filter with sigma = 1.3, sums to 128,
6096 // all co-efficients must be even.
6097 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 2, 12, 30, 40,
6098 30, 12, 2, 0 };
6099
av1_gaussian_blur(const uint8_t * src,int src_stride,int w,int h,uint8_t * dst,bool high_bd,int bd)6100 void av1_gaussian_blur(const uint8_t *src, int src_stride, int w, int h,
6101 uint8_t *dst, bool high_bd, int bd) {
6102 ConvolveParams conv_params = get_conv_params(0, 0, bd);
6103 InterpFilterParams filter = { .filter_ptr = gauss_filter,
6104 .taps = 8,
6105 .interp_filter = EIGHTTAP_REGULAR };
6106 // Requirements from the vector-optimized implementations.
6107 assert(h % 4 == 0);
6108 assert(w % 8 == 0);
6109 // Because we use an eight tap filter, the stride should be at least 7 + w.
6110 assert(src_stride >= w + 7);
6111 #if CONFIG_AV1_HIGHBITDEPTH
6112 if (high_bd) {
6113 av1_highbd_convolve_2d_sr(CONVERT_TO_SHORTPTR(src), src_stride,
6114 CONVERT_TO_SHORTPTR(dst), w, w, h, &filter,
6115 &filter, 0, 0, &conv_params, bd);
6116 } else {
6117 av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6118 &conv_params);
6119 }
6120 #else
6121 (void)high_bd;
6122 av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6123 &conv_params);
6124 #endif
6125 }
6126
edge_probability(const uint8_t * input,int w,int h,bool high_bd,int bd)6127 static EdgeInfo edge_probability(const uint8_t *input, int w, int h,
6128 bool high_bd, int bd) {
6129 // The probability of an edge in the whole image is the same as the highest
6130 // probability of an edge for any individual pixel. Use Sobel as the metric
6131 // for finding an edge.
6132 uint16_t highest = 0;
6133 uint16_t highest_x = 0;
6134 uint16_t highest_y = 0;
6135 // Ignore the 1 pixel border around the image for the computation.
6136 for (int j = 1; j < h - 1; ++j) {
6137 for (int i = 1; i < w - 1; ++i) {
6138 sobel_xy g = av1_sobel(input, w, i, j, high_bd);
6139 // Scale down to 8-bit to get same output regardless of bit depth.
6140 int16_t g_x = g.x >> (bd - 8);
6141 int16_t g_y = g.y >> (bd - 8);
6142 uint16_t magnitude = (uint16_t)sqrt(g_x * g_x + g_y * g_y);
6143 highest = AOMMAX(highest, magnitude);
6144 highest_x = AOMMAX(highest_x, g_x);
6145 highest_y = AOMMAX(highest_y, g_y);
6146 }
6147 }
6148 EdgeInfo ei = { .magnitude = highest, .x = highest_x, .y = highest_y };
6149 return ei;
6150 }
6151
6152 /* Uses most of the Canny edge detection algorithm to find if there are any
6153 * edges in the image.
6154 */
av1_edge_exists(const uint8_t * src,int src_stride,int w,int h,bool high_bd,int bd)6155 EdgeInfo av1_edge_exists(const uint8_t *src, int src_stride, int w, int h,
6156 bool high_bd, int bd) {
6157 if (w < 3 || h < 3) {
6158 EdgeInfo n = { .magnitude = 0, .x = 0, .y = 0 };
6159 return n;
6160 }
6161 uint8_t *blurred;
6162 if (high_bd) {
6163 blurred = CONVERT_TO_BYTEPTR(aom_memalign(32, sizeof(uint16_t) * w * h));
6164 } else {
6165 blurred = (uint8_t *)aom_memalign(32, sizeof(uint8_t) * w * h);
6166 }
6167 av1_gaussian_blur(src, src_stride, w, h, blurred, high_bd, bd);
6168 // Skip the non-maximum suppression step in Canny edge detection. We just
6169 // want a probability of an edge existing in the buffer, which is determined
6170 // by the strongest edge in it -- we don't need to eliminate the weaker
6171 // edges. Use Sobel for the edge detection.
6172 EdgeInfo prob = edge_probability(blurred, w, h, high_bd, bd);
6173 if (high_bd) {
6174 aom_free(CONVERT_TO_SHORTPTR(blurred));
6175 } else {
6176 aom_free(blurred);
6177 }
6178 return prob;
6179 }
6180