1 /*
2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72
73 #define LAST_NEW_MV_INDEX 6
74
75 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
76 // The values are kept in Q12 format and equation used to derive is
77 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
78 #define MODE_THRESH_QBITS 12
79 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
80 10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
81 9975, 9951, 9927, 9903, 9879, 9854, 9830, 9806, 9782, 9758, 9734,
82 9710, 9686, 9662, 9638, 9614, 9589, 9565, 9541, 9517, 9493, 9469,
83 9445, 9421, 9397, 9373, 9349, 9324, 9300, 9276, 9252, 9228, 9204,
84 9180, 9156, 9132, 9108, 9083, 9059, 9035, 9011, 8987, 8963, 8939,
85 8915, 8891, 8867, 8843, 8818, 8794, 8770, 8746, 8722, 8698, 8674,
86 8650, 8626, 8602, 8578, 8553, 8529, 8505, 8481, 8457, 8433, 8409,
87 8385, 8361, 8337, 8312, 8288, 8264, 8240, 8216, 8192, 8168, 8144,
88 8120, 8096, 8072, 8047, 8023, 7999, 7975, 7951, 7927, 7903, 7879,
89 7855, 7831, 7806, 7782, 7758, 7734, 7710, 7686, 7662, 7638, 7614,
90 7590, 7566, 7541, 7517, 7493, 7469, 7445, 7421, 7397, 7373, 7349,
91 7325, 7301, 7276, 7252, 7228, 7204, 7180, 7156, 7132, 7108, 7084,
92 7060, 7035, 7011, 6987, 6963, 6939, 6915, 6891, 6867, 6843, 6819,
93 6795, 6770, 6746, 6722, 6698, 6674, 6650, 6626, 6602, 6578, 6554,
94 6530, 6505, 6481, 6457, 6433, 6409, 6385, 6361, 6337, 6313, 6289,
95 6264, 6240, 6216, 6192, 6168, 6144, 6120, 6096, 6072, 6048, 6024,
96 5999, 5975, 5951, 5927, 5903, 5879, 5855, 5831, 5807, 5783, 5758,
97 5734, 5710, 5686, 5662, 5638, 5614, 5590, 5566, 5542, 5518, 5493,
98 5469, 5445, 5421, 5397, 5373, 5349, 5325, 5301, 5277, 5253, 5228,
99 5204, 5180, 5156, 5132, 5108, 5084, 5060, 5036, 5012, 4987, 4963,
100 4939, 4915, 4891, 4867, 4843, 4819, 4795, 4771, 4747, 4722, 4698,
101 4674, 4650, 4626, 4602, 4578, 4554, 4530, 4506, 4482, 4457, 4433,
102 4409, 4385, 4361, 4337, 4313, 4289, 4265, 4241, 4216, 4192, 4168,
103 4144, 4120, 4096
104 };
105
106 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
107 THR_NEARESTMV,
108 THR_NEARESTL2,
109 THR_NEARESTL3,
110 THR_NEARESTB,
111 THR_NEARESTA2,
112 THR_NEARESTA,
113 THR_NEARESTG,
114
115 THR_NEWMV,
116 THR_NEWL2,
117 THR_NEWL3,
118 THR_NEWB,
119 THR_NEWA2,
120 THR_NEWA,
121 THR_NEWG,
122
123 THR_NEARMV,
124 THR_NEARL2,
125 THR_NEARL3,
126 THR_NEARB,
127 THR_NEARA2,
128 THR_NEARA,
129 THR_NEARG,
130
131 THR_GLOBALMV,
132 THR_GLOBALL2,
133 THR_GLOBALL3,
134 THR_GLOBALB,
135 THR_GLOBALA2,
136 THR_GLOBALA,
137 THR_GLOBALG,
138
139 THR_COMP_NEAREST_NEARESTLA,
140 THR_COMP_NEAREST_NEARESTL2A,
141 THR_COMP_NEAREST_NEARESTL3A,
142 THR_COMP_NEAREST_NEARESTGA,
143 THR_COMP_NEAREST_NEARESTLB,
144 THR_COMP_NEAREST_NEARESTL2B,
145 THR_COMP_NEAREST_NEARESTL3B,
146 THR_COMP_NEAREST_NEARESTGB,
147 THR_COMP_NEAREST_NEARESTLA2,
148 THR_COMP_NEAREST_NEARESTL2A2,
149 THR_COMP_NEAREST_NEARESTL3A2,
150 THR_COMP_NEAREST_NEARESTGA2,
151 THR_COMP_NEAREST_NEARESTLL2,
152 THR_COMP_NEAREST_NEARESTLL3,
153 THR_COMP_NEAREST_NEARESTLG,
154 THR_COMP_NEAREST_NEARESTBA,
155
156 THR_COMP_NEAR_NEARLB,
157 THR_COMP_NEW_NEWLB,
158 THR_COMP_NEW_NEARESTLB,
159 THR_COMP_NEAREST_NEWLB,
160 THR_COMP_NEW_NEARLB,
161 THR_COMP_NEAR_NEWLB,
162 THR_COMP_GLOBAL_GLOBALLB,
163
164 THR_COMP_NEAR_NEARLA,
165 THR_COMP_NEW_NEWLA,
166 THR_COMP_NEW_NEARESTLA,
167 THR_COMP_NEAREST_NEWLA,
168 THR_COMP_NEW_NEARLA,
169 THR_COMP_NEAR_NEWLA,
170 THR_COMP_GLOBAL_GLOBALLA,
171
172 THR_COMP_NEAR_NEARL2A,
173 THR_COMP_NEW_NEWL2A,
174 THR_COMP_NEW_NEARESTL2A,
175 THR_COMP_NEAREST_NEWL2A,
176 THR_COMP_NEW_NEARL2A,
177 THR_COMP_NEAR_NEWL2A,
178 THR_COMP_GLOBAL_GLOBALL2A,
179
180 THR_COMP_NEAR_NEARL3A,
181 THR_COMP_NEW_NEWL3A,
182 THR_COMP_NEW_NEARESTL3A,
183 THR_COMP_NEAREST_NEWL3A,
184 THR_COMP_NEW_NEARL3A,
185 THR_COMP_NEAR_NEWL3A,
186 THR_COMP_GLOBAL_GLOBALL3A,
187
188 THR_COMP_NEAR_NEARGA,
189 THR_COMP_NEW_NEWGA,
190 THR_COMP_NEW_NEARESTGA,
191 THR_COMP_NEAREST_NEWGA,
192 THR_COMP_NEW_NEARGA,
193 THR_COMP_NEAR_NEWGA,
194 THR_COMP_GLOBAL_GLOBALGA,
195
196 THR_COMP_NEAR_NEARL2B,
197 THR_COMP_NEW_NEWL2B,
198 THR_COMP_NEW_NEARESTL2B,
199 THR_COMP_NEAREST_NEWL2B,
200 THR_COMP_NEW_NEARL2B,
201 THR_COMP_NEAR_NEWL2B,
202 THR_COMP_GLOBAL_GLOBALL2B,
203
204 THR_COMP_NEAR_NEARL3B,
205 THR_COMP_NEW_NEWL3B,
206 THR_COMP_NEW_NEARESTL3B,
207 THR_COMP_NEAREST_NEWL3B,
208 THR_COMP_NEW_NEARL3B,
209 THR_COMP_NEAR_NEWL3B,
210 THR_COMP_GLOBAL_GLOBALL3B,
211
212 THR_COMP_NEAR_NEARGB,
213 THR_COMP_NEW_NEWGB,
214 THR_COMP_NEW_NEARESTGB,
215 THR_COMP_NEAREST_NEWGB,
216 THR_COMP_NEW_NEARGB,
217 THR_COMP_NEAR_NEWGB,
218 THR_COMP_GLOBAL_GLOBALGB,
219
220 THR_COMP_NEAR_NEARLA2,
221 THR_COMP_NEW_NEWLA2,
222 THR_COMP_NEW_NEARESTLA2,
223 THR_COMP_NEAREST_NEWLA2,
224 THR_COMP_NEW_NEARLA2,
225 THR_COMP_NEAR_NEWLA2,
226 THR_COMP_GLOBAL_GLOBALLA2,
227
228 THR_COMP_NEAR_NEARL2A2,
229 THR_COMP_NEW_NEWL2A2,
230 THR_COMP_NEW_NEARESTL2A2,
231 THR_COMP_NEAREST_NEWL2A2,
232 THR_COMP_NEW_NEARL2A2,
233 THR_COMP_NEAR_NEWL2A2,
234 THR_COMP_GLOBAL_GLOBALL2A2,
235
236 THR_COMP_NEAR_NEARL3A2,
237 THR_COMP_NEW_NEWL3A2,
238 THR_COMP_NEW_NEARESTL3A2,
239 THR_COMP_NEAREST_NEWL3A2,
240 THR_COMP_NEW_NEARL3A2,
241 THR_COMP_NEAR_NEWL3A2,
242 THR_COMP_GLOBAL_GLOBALL3A2,
243
244 THR_COMP_NEAR_NEARGA2,
245 THR_COMP_NEW_NEWGA2,
246 THR_COMP_NEW_NEARESTGA2,
247 THR_COMP_NEAREST_NEWGA2,
248 THR_COMP_NEW_NEARGA2,
249 THR_COMP_NEAR_NEWGA2,
250 THR_COMP_GLOBAL_GLOBALGA2,
251
252 THR_COMP_NEAR_NEARLL2,
253 THR_COMP_NEW_NEWLL2,
254 THR_COMP_NEW_NEARESTLL2,
255 THR_COMP_NEAREST_NEWLL2,
256 THR_COMP_NEW_NEARLL2,
257 THR_COMP_NEAR_NEWLL2,
258 THR_COMP_GLOBAL_GLOBALLL2,
259
260 THR_COMP_NEAR_NEARLL3,
261 THR_COMP_NEW_NEWLL3,
262 THR_COMP_NEW_NEARESTLL3,
263 THR_COMP_NEAREST_NEWLL3,
264 THR_COMP_NEW_NEARLL3,
265 THR_COMP_NEAR_NEWLL3,
266 THR_COMP_GLOBAL_GLOBALLL3,
267
268 THR_COMP_NEAR_NEARLG,
269 THR_COMP_NEW_NEWLG,
270 THR_COMP_NEW_NEARESTLG,
271 THR_COMP_NEAREST_NEWLG,
272 THR_COMP_NEW_NEARLG,
273 THR_COMP_NEAR_NEWLG,
274 THR_COMP_GLOBAL_GLOBALLG,
275
276 THR_COMP_NEAR_NEARBA,
277 THR_COMP_NEW_NEWBA,
278 THR_COMP_NEW_NEARESTBA,
279 THR_COMP_NEAREST_NEWBA,
280 THR_COMP_NEW_NEARBA,
281 THR_COMP_NEAR_NEWBA,
282 THR_COMP_GLOBAL_GLOBALBA,
283
284 THR_DC,
285 THR_PAETH,
286 THR_SMOOTH,
287 THR_SMOOTH_V,
288 THR_SMOOTH_H,
289 THR_H_PRED,
290 THR_V_PRED,
291 THR_D135_PRED,
292 THR_D203_PRED,
293 THR_D157_PRED,
294 THR_D67_PRED,
295 THR_D113_PRED,
296 THR_D45_PRED,
297 };
298
299 /*!\cond */
300 typedef struct SingleInterModeState {
301 int64_t rd;
302 MV_REFERENCE_FRAME ref_frame;
303 int valid;
304 } SingleInterModeState;
305
306 typedef struct InterModeSearchState {
307 int64_t best_rd;
308 int64_t best_skip_rd[2];
309 MB_MODE_INFO best_mbmode;
310 int best_rate_y;
311 int best_rate_uv;
312 int best_mode_skippable;
313 int best_skip2;
314 THR_MODES best_mode_index;
315 int num_available_refs;
316 int64_t dist_refs[REF_FRAMES];
317 int dist_order_refs[REF_FRAMES];
318 int64_t mode_threshold[MAX_MODES];
319 int64_t best_intra_rd;
320 unsigned int best_pred_sse;
321
322 /*!
323 * \brief Keep track of best intra rd for use in compound mode.
324 */
325 int64_t best_pred_rd[REFERENCE_MODES];
326 int64_t best_pred_diff[REFERENCE_MODES];
327 // Save a set of single_newmv for each checked ref_mv.
328 int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329 int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330 int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331 int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332 // The rd of simple translation in single inter modes
333 int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334 int64_t best_single_rd[REF_FRAMES];
335 PREDICTION_MODE best_single_mode[REF_FRAMES];
336
337 // Single search results by [directions][modes][reference frames]
338 SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339 int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340 SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341 [FWD_REFS];
342 int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343 MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344 IntraModeSearchState intra_search_state;
345 RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350 for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351 InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352 md->ready = 0;
353 md->num = 0;
354 md->dist_sum = 0;
355 md->ld_sum = 0;
356 md->sse_sum = 0;
357 md->sse_sse_sum = 0;
358 md->sse_ld_sum = 0;
359 }
360 }
361
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363 int64_t sse, int *est_residue_cost,
364 int64_t *est_dist) {
365 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366 if (md->ready) {
367 if (sse < md->dist_mean) {
368 *est_residue_cost = 0;
369 *est_dist = sse;
370 } else {
371 *est_dist = (int64_t)round(md->dist_mean);
372 const double est_ld = md->a * sse + md->b;
373 // Clamp estimated rate cost by INT_MAX / 2.
374 // TODO(angiebird@google.com): find better solution than clamping.
375 if (fabs(est_ld) < 1e-2) {
376 *est_residue_cost = INT_MAX / 2;
377 } else {
378 double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379 if (est_residue_cost_dbl < 0) {
380 *est_residue_cost = 0;
381 } else {
382 *est_residue_cost =
383 (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384 }
385 }
386 if (*est_residue_cost <= 0) {
387 *est_residue_cost = 0;
388 *est_dist = sse;
389 }
390 }
391 return 1;
392 }
393 return 0;
394 }
395
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397 for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398 const int block_idx = inter_mode_data_block_idx(bsize);
399 InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400 if (block_idx == -1) continue;
401 if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402 continue;
403 } else {
404 if (md->ready == 0) {
405 md->dist_mean = md->dist_sum / md->num;
406 md->ld_mean = md->ld_sum / md->num;
407 md->sse_mean = md->sse_sum / md->num;
408 md->sse_sse_mean = md->sse_sse_sum / md->num;
409 md->sse_ld_mean = md->sse_ld_sum / md->num;
410 } else {
411 const double factor = 3;
412 md->dist_mean =
413 (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414 md->ld_mean =
415 (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416 md->sse_mean =
417 (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418 md->sse_sse_mean =
419 (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420 (factor + 1);
421 md->sse_ld_mean =
422 (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423 (factor + 1);
424 }
425
426 const double my = md->ld_mean;
427 const double mx = md->sse_mean;
428 const double dx = sqrt(md->sse_sse_mean);
429 const double dxy = md->sse_ld_mean;
430
431 md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432 md->b = my - md->a * mx;
433 md->ready = 1;
434
435 md->num = 0;
436 md->dist_sum = 0;
437 md->ld_sum = 0;
438 md->sse_sum = 0;
439 md->sse_sse_sum = 0;
440 md->sse_ld_sum = 0;
441 }
442 (void)rdmult;
443 }
444 }
445
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
447 BLOCK_SIZE bsize, int64_t sse,
448 int64_t dist, int residue_cost) {
449 if (residue_cost == 0 || sse == dist) return;
450 const int block_idx = inter_mode_data_block_idx(bsize);
451 if (block_idx == -1) return;
452 InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453 if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454 const double ld = (sse - dist) * 1. / residue_cost;
455 ++rd_model->num;
456 rd_model->dist_sum += dist;
457 rd_model->ld_sum += ld;
458 rd_model->sse_sum += sse;
459 rd_model->sse_sse_sum += (double)sse * (double)sse;
460 rd_model->sse_ld_sum += sse * ld;
461 }
462 }
463
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
465 int mode_rate, int64_t sse,
466 int64_t rd, RD_STATS *rd_cost,
467 RD_STATS *rd_cost_y,
468 RD_STATS *rd_cost_uv,
469 const MB_MODE_INFO *mbmi) {
470 const int num = inter_modes_info->num;
471 assert(num < MAX_INTER_MODES);
472 inter_modes_info->mbmi_arr[num] = *mbmi;
473 inter_modes_info->mode_rate_arr[num] = mode_rate;
474 inter_modes_info->sse_arr[num] = sse;
475 inter_modes_info->est_rd_arr[num] = rd;
476 inter_modes_info->rd_cost_arr[num] = *rd_cost;
477 inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
478 inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
479 ++inter_modes_info->num;
480 }
481
compare_rd_idx_pair(const void * a,const void * b)482 static int compare_rd_idx_pair(const void *a, const void *b) {
483 if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
484 // To avoid inconsistency in qsort() ordering when two elements are equal,
485 // using idx as tie breaker. Refer aomedia:2928
486 if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
487 return 0;
488 else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
489 return 1;
490 else
491 return -1;
492 } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
493 return 1;
494 } else {
495 return -1;
496 }
497 }
498
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)499 static AOM_INLINE void inter_modes_info_sort(
500 const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
501 if (inter_modes_info->num == 0) {
502 return;
503 }
504 for (int i = 0; i < inter_modes_info->num; ++i) {
505 rd_idx_pair_arr[i].idx = i;
506 rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
507 }
508 qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
509 compare_rd_idx_pair);
510 }
511
512 // Similar to get_horver_correlation, but also takes into account first
513 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)514 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
515 int width, int height, float *hcorr,
516 float *vcorr) {
517 // The following notation is used:
518 // x - current pixel
519 // y - left neighbor pixel
520 // z - top neighbor pixel
521 int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
522 int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
523 int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
524
525 // First, process horizontal correlation on just the first row
526 x_sum += diff[0];
527 x2_sum += diff[0] * diff[0];
528 x_firstrow += diff[0];
529 x2_firstrow += diff[0] * diff[0];
530 for (int j = 1; j < width; ++j) {
531 const int16_t x = diff[j];
532 const int16_t y = diff[j - 1];
533 x_sum += x;
534 x_firstrow += x;
535 x2_sum += x * x;
536 x2_firstrow += x * x;
537 xy_sum += x * y;
538 }
539
540 // Process vertical correlation in the first column
541 x_firstcol += diff[0];
542 x2_firstcol += diff[0] * diff[0];
543 for (int i = 1; i < height; ++i) {
544 const int16_t x = diff[i * stride];
545 const int16_t z = diff[(i - 1) * stride];
546 x_sum += x;
547 x_firstcol += x;
548 x2_sum += x * x;
549 x2_firstcol += x * x;
550 xz_sum += x * z;
551 }
552
553 // Now process horiz and vert correlation through the rest unit
554 for (int i = 1; i < height; ++i) {
555 for (int j = 1; j < width; ++j) {
556 const int16_t x = diff[i * stride + j];
557 const int16_t y = diff[i * stride + j - 1];
558 const int16_t z = diff[(i - 1) * stride + j];
559 x_sum += x;
560 x2_sum += x * x;
561 xy_sum += x * y;
562 xz_sum += x * z;
563 }
564 }
565
566 for (int j = 0; j < width; ++j) {
567 x_finalrow += diff[(height - 1) * stride + j];
568 x2_finalrow +=
569 diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
570 }
571 for (int i = 0; i < height; ++i) {
572 x_finalcol += diff[i * stride + width - 1];
573 x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
574 }
575
576 int64_t xhor_sum = x_sum - x_finalcol;
577 int64_t xver_sum = x_sum - x_finalrow;
578 int64_t y_sum = x_sum - x_firstcol;
579 int64_t z_sum = x_sum - x_firstrow;
580 int64_t x2hor_sum = x2_sum - x2_finalcol;
581 int64_t x2ver_sum = x2_sum - x2_finalrow;
582 int64_t y2_sum = x2_sum - x2_firstcol;
583 int64_t z2_sum = x2_sum - x2_firstrow;
584
585 const float num_hor = (float)(height * (width - 1));
586 const float num_ver = (float)((height - 1) * width);
587
588 const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
589 const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
590
591 const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
592 const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
593
594 const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
595 const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
596
597 if (xhor_var_n > 0 && y_var_n > 0) {
598 *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
599 *hcorr = *hcorr < 0 ? 0 : *hcorr;
600 } else {
601 *hcorr = 1.0;
602 }
603 if (xver_var_n > 0 && z_var_n > 0) {
604 *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
605 *vcorr = *vcorr < 0 ? 0 : *vcorr;
606 } else {
607 *vcorr = 1.0;
608 }
609 }
610
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)611 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
612 int64_t *sse_y) {
613 const AV1_COMMON *cm = &cpi->common;
614 const int num_planes = av1_num_planes(cm);
615 const MACROBLOCKD *xd = &x->e_mbd;
616 const MB_MODE_INFO *mbmi = xd->mi[0];
617 int64_t total_sse = 0;
618 for (int plane = 0; plane < num_planes; ++plane) {
619 if (plane && !xd->is_chroma_ref) break;
620 const struct macroblock_plane *const p = &x->plane[plane];
621 const struct macroblockd_plane *const pd = &xd->plane[plane];
622 const BLOCK_SIZE bs =
623 get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
624 unsigned int sse;
625
626 cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
627 pd->dst.stride, &sse);
628 total_sse += sse;
629 if (!plane && sse_y) *sse_y = sse;
630 }
631 total_sse <<= 4;
632 return total_sse;
633 }
634
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)635 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
636 intptr_t block_size, int64_t *ssz) {
637 int i;
638 int64_t error = 0, sqcoeff = 0;
639
640 for (i = 0; i < block_size; i++) {
641 const int diff = coeff[i] - dqcoeff[i];
642 error += diff * diff;
643 sqcoeff += coeff[i] * coeff[i];
644 }
645
646 *ssz = sqcoeff;
647 return error;
648 }
649
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)650 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
651 intptr_t block_size) {
652 int64_t error = 0;
653
654 for (int i = 0; i < block_size; i++) {
655 const int diff = coeff[i] - dqcoeff[i];
656 error += diff * diff;
657 }
658
659 return error;
660 }
661
662 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)663 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
664 const tran_low_t *dqcoeff, intptr_t block_size,
665 int64_t *ssz, int bd) {
666 int i;
667 int64_t error = 0, sqcoeff = 0;
668 int shift = 2 * (bd - 8);
669 int rounding = shift > 0 ? 1 << (shift - 1) : 0;
670
671 for (i = 0; i < block_size; i++) {
672 const int64_t diff = coeff[i] - dqcoeff[i];
673 error += diff * diff;
674 sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
675 }
676 assert(error >= 0 && sqcoeff >= 0);
677 error = (error + rounding) >> shift;
678 sqcoeff = (sqcoeff + rounding) >> shift;
679
680 *ssz = sqcoeff;
681 return error;
682 }
683 #endif
684
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)685 static int conditional_skipintra(PREDICTION_MODE mode,
686 PREDICTION_MODE best_intra_mode) {
687 if (mode == D113_PRED && best_intra_mode != V_PRED &&
688 best_intra_mode != D135_PRED)
689 return 1;
690 if (mode == D67_PRED && best_intra_mode != V_PRED &&
691 best_intra_mode != D45_PRED)
692 return 1;
693 if (mode == D203_PRED && best_intra_mode != H_PRED &&
694 best_intra_mode != D45_PRED)
695 return 1;
696 if (mode == D157_PRED && best_intra_mode != H_PRED &&
697 best_intra_mode != D135_PRED)
698 return 1;
699 return 0;
700 }
701
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)702 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
703 int16_t mode_context) {
704 if (is_inter_compound_mode(mode)) {
705 return mode_costs
706 ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
707 }
708
709 int mode_cost = 0;
710 int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
711
712 assert(is_inter_mode(mode));
713
714 if (mode == NEWMV) {
715 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
716 return mode_cost;
717 } else {
718 mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
719 mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
720
721 if (mode == GLOBALMV) {
722 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
723 return mode_cost;
724 } else {
725 mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
726 mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
727 mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
728 return mode_cost;
729 }
730 }
731 }
732
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)733 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
734 int ref_idx) {
735 return ref_idx ? compound_ref1_mode(this_mode)
736 : compound_ref0_mode(this_mode);
737 }
738
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])739 static AOM_INLINE void estimate_ref_frame_costs(
740 const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
741 int segment_id, unsigned int *ref_costs_single,
742 unsigned int (*ref_costs_comp)[REF_FRAMES]) {
743 int seg_ref_active =
744 segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
745 if (seg_ref_active) {
746 memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
747 int ref_frame;
748 for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
749 memset(ref_costs_comp[ref_frame], 0,
750 REF_FRAMES * sizeof((*ref_costs_comp)[0]));
751 } else {
752 int intra_inter_ctx = av1_get_intra_inter_context(xd);
753 ref_costs_single[INTRA_FRAME] =
754 mode_costs->intra_inter_cost[intra_inter_ctx][0];
755 unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
756
757 for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
758 ref_costs_single[i] = base_cost;
759
760 const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
761 const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
762 const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
763 const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
764 const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
765 const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
766
767 // Determine cost of a single ref frame, where frame types are represented
768 // by a tree:
769 // Level 0: add cost whether this ref is a forward or backward ref
770 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
774 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775 ref_costs_single[ALTREF2_FRAME] +=
776 mode_costs->single_ref_cost[ctx_p1][0][1];
777 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
778
779 // Level 1: if this ref is forward ref,
780 // add cost whether it is last/last2 or last3/golden
781 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
783 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
785
786 // Level 1: if this ref is backward ref
787 // then add cost whether this ref is altref or backward ref
788 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
789 ref_costs_single[ALTREF2_FRAME] +=
790 mode_costs->single_ref_cost[ctx_p2][1][0];
791 ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
792
793 // Level 2: further add cost whether this ref is last or last2
794 ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
795 ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
796
797 // Level 2: last3 or golden
798 ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
799 ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
800
801 // Level 2: bwdref or altref2
802 ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
803 ref_costs_single[ALTREF2_FRAME] +=
804 mode_costs->single_ref_cost[ctx_p6][5][1];
805
806 if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
807 // Similar to single ref, determine cost of compound ref frames.
808 // cost_compound_refs = cost_first_ref + cost_second_ref
809 const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
810 const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
811 const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
812 const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
813 const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
814
815 const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
816 unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
817
818 ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
819 ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
820 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
821 ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
822 ref_bicomp_costs[ALTREF_FRAME] = 0;
823
824 // cost of first ref frame
825 ref_bicomp_costs[LAST_FRAME] +=
826 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827 ref_bicomp_costs[LAST2_FRAME] +=
828 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
829 ref_bicomp_costs[LAST3_FRAME] +=
830 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831 ref_bicomp_costs[GOLDEN_FRAME] +=
832 mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
833
834 ref_bicomp_costs[LAST_FRAME] +=
835 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
836 ref_bicomp_costs[LAST2_FRAME] +=
837 mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
838
839 ref_bicomp_costs[LAST3_FRAME] +=
840 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
841 ref_bicomp_costs[GOLDEN_FRAME] +=
842 mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
843
844 // cost of second ref frame
845 ref_bicomp_costs[BWDREF_FRAME] +=
846 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847 ref_bicomp_costs[ALTREF2_FRAME] +=
848 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
849 ref_bicomp_costs[ALTREF_FRAME] +=
850 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
851
852 ref_bicomp_costs[BWDREF_FRAME] +=
853 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
854 ref_bicomp_costs[ALTREF2_FRAME] +=
855 mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
856
857 // cost: if one ref frame is forward ref, the other ref is backward ref
858 int ref0, ref1;
859 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
860 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
861 ref_costs_comp[ref0][ref1] =
862 ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
863 }
864 }
865
866 // cost: if both ref frames are the same side.
867 const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
868 const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
869 const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
870 ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
871 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
872 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
873 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
874 ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
875 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
876 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
877 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
878 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
879 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
880 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
881 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
882 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
883 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
884 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
885 base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
886 mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
887 } else {
888 int ref0, ref1;
889 for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
890 for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
891 ref_costs_comp[ref0][ref1] = 512;
892 }
893 ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
894 ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
895 ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
896 ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
897 }
898 }
899 }
900
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int64_t comp_pred_diff[REFERENCE_MODES],int skippable)901 static AOM_INLINE void store_coding_context(
902 #if CONFIG_INTERNAL_STATS
903 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
904 #else
905 MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
906 #endif // CONFIG_INTERNAL_STATS
907 int64_t comp_pred_diff[REFERENCE_MODES], int skippable) {
908 MACROBLOCKD *const xd = &x->e_mbd;
909
910 // Take a snapshot of the coding context so it can be
911 // restored if we decide to encode this way
912 ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
913 ctx->skippable = skippable;
914 #if CONFIG_INTERNAL_STATS
915 ctx->best_mode_index = mode_index;
916 #endif // CONFIG_INTERNAL_STATS
917 ctx->mic = *xd->mi[0];
918 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
919 av1_ref_frame_type(xd->mi[0]->ref_frame));
920 ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
921 ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
922 ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
923 }
924
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])925 static AOM_INLINE void setup_buffer_ref_mvs_inter(
926 const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
927 BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
928 const AV1_COMMON *cm = &cpi->common;
929 const int num_planes = av1_num_planes(cm);
930 const YV12_BUFFER_CONFIG *scaled_ref_frame =
931 av1_get_scaled_ref_frame(cpi, ref_frame);
932 MACROBLOCKD *const xd = &x->e_mbd;
933 MB_MODE_INFO *const mbmi = xd->mi[0];
934 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
935 const struct scale_factors *const sf =
936 get_ref_scale_factors_const(cm, ref_frame);
937 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
938 assert(yv12 != NULL);
939
940 if (scaled_ref_frame) {
941 // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
942 // support scaling.
943 av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
944 num_planes);
945 } else {
946 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
947 }
948
949 // Gets an initial list of candidate vectors from neighbours and orders them
950 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
951 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
952 mbmi_ext->mode_context);
953 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
954 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
955 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
956 // Further refinement that is encode side only to test the top few candidates
957 // in full and choose the best as the center point for subsequent searches.
958 // The current implementation doesn't support scaling.
959 av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
960 ref_frame, block_size);
961
962 // Go back to unscaled reference.
963 if (scaled_ref_frame) {
964 // We had temporarily setup pred block based on scaled reference above. Go
965 // back to unscaled reference now, for subsequent use.
966 av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
967 }
968 }
969
970 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
971 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
972
973 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)974 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
975 const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
976 xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
977 xd->mb_to_top_edge - LEFT_TOP_MARGIN,
978 xd->mb_to_bottom_edge +
979 RIGHT_BOTTOM_MARGIN };
980 clamp_mv(mv, &mv_limits);
981 }
982
983 /* If the current mode shares the same mv with other modes with higher cost,
984 * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)985 static int skip_repeated_mv(const AV1_COMMON *const cm,
986 const MACROBLOCK *const x,
987 PREDICTION_MODE this_mode,
988 const MV_REFERENCE_FRAME ref_frames[2],
989 InterModeSearchState *search_state) {
990 const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
991 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
992 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
993 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
994 PREDICTION_MODE compare_mode = MB_MODE_COUNT;
995 if (!is_comp_pred) {
996 if (this_mode == NEARMV) {
997 if (ref_mv_count == 0) {
998 // NEARMV has the same motion vector as NEARESTMV
999 compare_mode = NEARESTMV;
1000 }
1001 if (ref_mv_count == 1 &&
1002 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1003 // NEARMV has the same motion vector as GLOBALMV
1004 compare_mode = GLOBALMV;
1005 }
1006 }
1007 if (this_mode == GLOBALMV) {
1008 if (ref_mv_count == 0 &&
1009 cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1010 // GLOBALMV has the same motion vector as NEARESTMV
1011 compare_mode = NEARESTMV;
1012 }
1013 if (ref_mv_count == 1) {
1014 // GLOBALMV has the same motion vector as NEARMV
1015 compare_mode = NEARMV;
1016 }
1017 }
1018
1019 if (compare_mode != MB_MODE_COUNT) {
1020 // Use modelled_rd to check whether compare mode was searched
1021 if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1022 INT64_MAX) {
1023 const int16_t mode_ctx =
1024 av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1025 const int compare_cost =
1026 cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1027 const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1028
1029 // Only skip if the mode cost is larger than compare mode cost
1030 if (this_cost > compare_cost) {
1031 search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1032 search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1033 return 1;
1034 }
1035 }
1036 }
1037 }
1038 return 0;
1039 }
1040
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1041 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1042 const AV1_COMMON *cm,
1043 const MACROBLOCK *x) {
1044 const MACROBLOCKD *const xd = &x->e_mbd;
1045 *out_mv = in_mv;
1046 lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1047 cm->features.cur_frame_force_integer_mv);
1048 clamp_mv2(&out_mv->as_mv, xd);
1049 return av1_is_fullmv_in_range(&x->mv_limits,
1050 get_fullmv_from_mv(&out_mv->as_mv));
1051 }
1052
1053 // To use single newmv directly for compound modes, need to clamp the mv to the
1054 // valid mv range. Without this, encoder would generate out of range mv, and
1055 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1056 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1057 int ref_idx) {
1058 const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1059 SubpelMvLimits mv_limits;
1060
1061 av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1062 clamp_mv(&mv->as_mv, &mv_limits);
1063 }
1064
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1065 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1066 const BLOCK_SIZE bsize, int_mv *cur_mv,
1067 int *const rate_mv, HandleInterModeArgs *const args,
1068 inter_mode_info *mode_info) {
1069 MACROBLOCKD *const xd = &x->e_mbd;
1070 MB_MODE_INFO *const mbmi = xd->mi[0];
1071 const int is_comp_pred = has_second_ref(mbmi);
1072 const PREDICTION_MODE this_mode = mbmi->mode;
1073 const int refs[2] = { mbmi->ref_frame[0],
1074 mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1075 const int ref_mv_idx = mbmi->ref_mv_idx;
1076
1077 if (is_comp_pred) {
1078 const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1079 const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1080 if (this_mode == NEW_NEWMV) {
1081 if (valid_mv0) {
1082 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1083 clamp_mv_in_range(x, &cur_mv[0], 0);
1084 }
1085 if (valid_mv1) {
1086 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1087 clamp_mv_in_range(x, &cur_mv[1], 1);
1088 }
1089 *rate_mv = 0;
1090 for (int i = 0; i < 2; ++i) {
1091 const int_mv ref_mv = av1_get_ref_mv(x, i);
1092 *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1093 x->mv_costs->nmv_joint_cost,
1094 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1095 }
1096 } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1097 if (valid_mv1) {
1098 cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1099 clamp_mv_in_range(x, &cur_mv[1], 1);
1100 }
1101 const int_mv ref_mv = av1_get_ref_mv(x, 1);
1102 *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1103 x->mv_costs->nmv_joint_cost,
1104 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1105 } else {
1106 assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1107 if (valid_mv0) {
1108 cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1109 clamp_mv_in_range(x, &cur_mv[0], 0);
1110 }
1111 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1112 *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1113 x->mv_costs->nmv_joint_cost,
1114 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1115 }
1116 } else {
1117 // Single ref case.
1118 const int ref_idx = 0;
1119 int search_range = INT_MAX;
1120
1121 if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1122 const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1123 int min_mv_diff = INT_MAX;
1124 int best_match = -1;
1125 MV prev_ref_mv[2] = { { 0 } };
1126 for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1127 prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1128 idx, &x->mbmi_ext)
1129 .as_mv;
1130 const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1131 abs(ref_mv.col - prev_ref_mv[idx].col));
1132
1133 if (min_mv_diff > ref_mv_diff) {
1134 min_mv_diff = ref_mv_diff;
1135 best_match = idx;
1136 }
1137 }
1138
1139 if (min_mv_diff < (16 << 3)) {
1140 if (args->single_newmv_valid[best_match][refs[0]]) {
1141 search_range = min_mv_diff;
1142 search_range +=
1143 AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1144 prev_ref_mv[best_match].row),
1145 abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1146 prev_ref_mv[best_match].col));
1147 // Get full pixel search range.
1148 search_range = (search_range + 4) >> 3;
1149 }
1150 }
1151 }
1152
1153 int_mv best_mv;
1154 av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1155 mode_info, &best_mv, args);
1156 if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1157
1158 args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1159 args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1160 args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1161 cur_mv[0].as_int = best_mv.as_int;
1162
1163 // Return after single_newmv is set.
1164 if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1165 }
1166
1167 return 0;
1168 }
1169
update_mode_start_end_index(const AV1_COMP * const cpi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1170 static INLINE void update_mode_start_end_index(const AV1_COMP *const cpi,
1171 int *mode_index_start,
1172 int *mode_index_end,
1173 int last_motion_mode_allowed,
1174 int interintra_allowed,
1175 int eval_motion_mode) {
1176 *mode_index_start = (int)SIMPLE_TRANSLATION;
1177 *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1178 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1179 if (!eval_motion_mode) {
1180 *mode_index_end = (int)SIMPLE_TRANSLATION;
1181 } else {
1182 // Set the start index appropriately to process motion modes other than
1183 // simple translation
1184 *mode_index_start = 1;
1185 }
1186 }
1187 }
1188
1189 /*!\brief AV1 motion mode search
1190 *
1191 * \ingroup inter_mode_search
1192 * Function to search over and determine the motion mode. It will update
1193 * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1194 * WARPED_CAUSAL and determine any necessary side information for the selected
1195 * motion mode. It will also perform the full transform search, unless the
1196 * input parameter do_tx_search indicates to do an estimation of the RD rather
1197 * than an RD corresponding to a full transform search. It will return the
1198 * RD for the final motion_mode.
1199 * Do the RD search for a given inter mode and compute all information relevant
1200 * to the input mode. It will compute the best MV,
1201 * compound parameters (if the mode is a compound mode) and interpolation filter
1202 * parameters.
1203 *
1204 * \param[in] cpi Top-level encoder structure.
1205 * \param[in] tile_data Pointer to struct holding adaptive
1206 * data/contexts/models for the tile during
1207 * encoding.
1208 * \param[in] x Pointer to struct holding all the data for
1209 * the current macroblock.
1210 * \param[in] bsize Current block size.
1211 * \param[in,out] rd_stats Struct to keep track of the overall RD
1212 * information.
1213 * \param[in,out] rd_stats_y Struct to keep track of the RD information
1214 * for only the Y plane.
1215 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
1216 * for only the UV planes.
1217 * \param[in] args HandleInterModeArgs struct holding
1218 * miscellaneous arguments for inter mode
1219 * search. See the documentation for this
1220 * struct for a description of each member.
1221 * \param[in] ref_best_rd Best RD found so far for this block.
1222 * It is used for early termination of this
1223 * search if the RD exceeds this value.
1224 * \param[in,out] ref_skip_rd A length 2 array, where skip_rd[0] is the
1225 * best total RD for a skip mode so far, and
1226 * skip_rd[1] is the best RD for a skip mode so
1227 * far in luma. This is used as a speed feature
1228 * to skip the transform search if the computed
1229 * skip RD for the current mode is not better
1230 * than the best skip_rd so far.
1231 * \param[in,out] rate_mv The rate associated with the motion vectors.
1232 * This will be modified if a motion search is
1233 * done in the motion mode search.
1234 * \param[in,out] orig_dst A prediction buffer to hold a computed
1235 * prediction. This will eventually hold the
1236 * final prediction, and the tmp_dst info will
1237 * be copied here.
1238 * \param[in,out] best_est_rd Estimated RD for motion mode search if
1239 * do_tx_search (see below) is 0.
1240 * \param[in] do_tx_search Parameter to indicate whether or not to do
1241 * a full transform search. This will compute
1242 * an estimated RD for the modes without the
1243 * transform search and later perform the full
1244 * transform search on the best candidates.
1245 * \param[in] inter_modes_info InterModesInfo struct to hold inter mode
1246 * information to perform a full transform
1247 * search only on winning candidates searched
1248 * with an estimate for transform coding RD.
1249 * \param[in] eval_motion_mode Boolean whether or not to evaluate motion
1250 * motion modes other than SIMPLE_TRANSLATION.
1251 * \param[out] yrd Stores the rdcost corresponding to encoding
1252 * the luma plane.
1253 * \return Returns INT64_MAX if the determined motion mode is invalid and the
1254 * current motion mode being tested should be skipped. It returns 0 if the
1255 * motion mode search is a success.
1256 */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1257 static int64_t motion_mode_rd(
1258 const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1259 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1260 RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1261 int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1262 int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1263 int eval_motion_mode, int64_t *yrd) {
1264 const AV1_COMMON *const cm = &cpi->common;
1265 const FeatureFlags *const features = &cm->features;
1266 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1267 const int num_planes = av1_num_planes(cm);
1268 MACROBLOCKD *xd = &x->e_mbd;
1269 MB_MODE_INFO *mbmi = xd->mi[0];
1270 const int is_comp_pred = has_second_ref(mbmi);
1271 const PREDICTION_MODE this_mode = mbmi->mode;
1272 const int rate2_nocoeff = rd_stats->rate;
1273 int best_xskip_txfm = 0;
1274 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1275 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1276 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1277 const int rate_mv0 = *rate_mv;
1278 const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1279 is_interintra_allowed(mbmi) &&
1280 mbmi->compound_idx;
1281 WARP_SAMPLE_INFO *const warp_sample_info =
1282 &x->warp_sample_info[mbmi->ref_frame[0]];
1283 int *pts0 = warp_sample_info->pts;
1284 int *pts_inref0 = warp_sample_info->pts_inref;
1285
1286 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1287 const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1288 av1_invalid_rd_stats(&best_rd_stats);
1289 mbmi->num_proj_ref = 1; // assume num_proj_ref >=1
1290 MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1291 *yrd = INT64_MAX;
1292 if (features->switchable_motion_mode) {
1293 // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1294 // is allowed.
1295 last_motion_mode_allowed = motion_mode_allowed(
1296 xd->global_motion, xd, mbmi, features->allow_warped_motion);
1297 }
1298
1299 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1300 // Collect projection samples used in least squares approximation of
1301 // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1302 if (warp_sample_info->num < 0) {
1303 warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1304 }
1305 mbmi->num_proj_ref = warp_sample_info->num;
1306 }
1307 const int total_samples = mbmi->num_proj_ref;
1308 if (total_samples == 0) {
1309 // Do not search WARPED_CAUSAL if there are no samples to use to determine
1310 // warped parameters.
1311 last_motion_mode_allowed = OBMC_CAUSAL;
1312 }
1313
1314 const MB_MODE_INFO base_mbmi = *mbmi;
1315 MB_MODE_INFO best_mbmi;
1316 const int interp_filter = features->interp_filter;
1317 const int switchable_rate =
1318 av1_is_interp_needed(xd)
1319 ? av1_get_switchable_rate(x, xd, interp_filter,
1320 cm->seq_params->enable_dual_filter)
1321 : 0;
1322 int64_t best_rd = INT64_MAX;
1323 int best_rate_mv = rate_mv0;
1324 const int mi_row = xd->mi_row;
1325 const int mi_col = xd->mi_col;
1326 int mode_index_start, mode_index_end;
1327 // Modify the start and end index according to speed features. For example,
1328 // if SIMPLE_TRANSLATION has already been searched according to
1329 // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1330 // to avoid searching it again.
1331 update_mode_start_end_index(cpi, &mode_index_start, &mode_index_end,
1332 last_motion_mode_allowed, interintra_allowed,
1333 eval_motion_mode);
1334 // Main function loop. This loops over all of the possible motion modes and
1335 // computes RD to determine the best one. This process includes computing
1336 // any necessary side information for the motion mode and performing the
1337 // transform search.
1338 for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1339 mode_index++) {
1340 if (args->skip_motion_mode && mode_index) continue;
1341 int tmp_rate2 = rate2_nocoeff;
1342 const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1343 int tmp_rate_mv = rate_mv0;
1344
1345 *mbmi = base_mbmi;
1346 if (is_interintra_mode) {
1347 // Only use SIMPLE_TRANSLATION for interintra
1348 mbmi->motion_mode = SIMPLE_TRANSLATION;
1349 } else {
1350 mbmi->motion_mode = (MOTION_MODE)mode_index;
1351 assert(mbmi->ref_frame[1] != INTRA_FRAME);
1352 }
1353
1354 // Do not search OBMC if the probability of selecting it is below a
1355 // predetermined threshold for this update_type and block size.
1356 const FRAME_UPDATE_TYPE update_type =
1357 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1358 const int prune_obmc =
1359 cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1360 cpi->sf.inter_sf.prune_obmc_prob_thresh;
1361 if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1362 mbmi->motion_mode == OBMC_CAUSAL)
1363 continue;
1364
1365 if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1366 // SIMPLE_TRANSLATION mode: no need to recalculate.
1367 // The prediction is calculated before motion_mode_rd() is called in
1368 // handle_inter_mode()
1369 } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1370 const uint32_t cur_mv = mbmi->mv[0].as_int;
1371 // OBMC_CAUSAL not allowed for compound prediction
1372 assert(!is_comp_pred);
1373 if (have_newmv_in_inter_mode(this_mode)) {
1374 av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1375 &mbmi->mv[0], NULL);
1376 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1377 }
1378 if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1379 // Build the predictor according to the current motion vector if it has
1380 // not already been built
1381 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1382 0, av1_num_planes(cm) - 1);
1383 }
1384 // Build the inter predictor by blending the predictor corresponding to
1385 // this MV, and the neighboring blocks using the OBMC model
1386 av1_build_obmc_inter_prediction(
1387 cm, xd, args->above_pred_buf, args->above_pred_stride,
1388 args->left_pred_buf, args->left_pred_stride);
1389 #if !CONFIG_REALTIME_ONLY
1390 } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1391 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1392 mbmi->motion_mode = WARPED_CAUSAL;
1393 mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1394 mbmi->interp_filters =
1395 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1396
1397 memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1398 memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1399 // Select the samples according to motion vector difference
1400 if (mbmi->num_proj_ref > 1) {
1401 mbmi->num_proj_ref = av1_selectSamples(
1402 &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1403 }
1404
1405 // Compute the warped motion parameters with a least squares fit
1406 // using the collected samples
1407 if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1408 mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1409 &mbmi->wm_params, mi_row, mi_col)) {
1410 assert(!is_comp_pred);
1411 if (have_newmv_in_inter_mode(this_mode)) {
1412 // Refine MV for NEWMV mode
1413 const int_mv mv0 = mbmi->mv[0];
1414 const WarpedMotionParams wm_params0 = mbmi->wm_params;
1415 const int num_proj_ref0 = mbmi->num_proj_ref;
1416
1417 const int_mv ref_mv = av1_get_ref_mv(x, 0);
1418 SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1419 av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1420 &ref_mv.as_mv, NULL);
1421
1422 // Refine MV in a small range.
1423 av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1424 total_samples);
1425
1426 if (mv0.as_int != mbmi->mv[0].as_int) {
1427 // Keep the refined MV and WM parameters.
1428 tmp_rate_mv = av1_mv_bit_cost(
1429 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1430 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1431 tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1432 } else {
1433 // Restore the old MV and WM parameters.
1434 mbmi->mv[0] = mv0;
1435 mbmi->wm_params = wm_params0;
1436 mbmi->num_proj_ref = num_proj_ref0;
1437 }
1438 }
1439
1440 // Build the warped predictor
1441 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1442 av1_num_planes(cm) - 1);
1443 } else {
1444 continue;
1445 }
1446 #endif // !CONFIG_REALTIME_ONLY
1447 } else if (is_interintra_mode) {
1448 const int ret =
1449 av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1450 &tmp_rate_mv, &tmp_rate2, orig_dst);
1451 if (ret < 0) continue;
1452 }
1453
1454 // If we are searching newmv and the mv is the same as refmv, skip the
1455 // current mode
1456 if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1457
1458 // Update rd_stats for the current motion mode
1459 txfm_info->skip_txfm = 0;
1460 rd_stats->dist = 0;
1461 rd_stats->sse = 0;
1462 rd_stats->skip_txfm = 1;
1463 rd_stats->rate = tmp_rate2;
1464 const ModeCosts *mode_costs = &x->mode_costs;
1465 if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1466 if (interintra_allowed) {
1467 rd_stats->rate +=
1468 mode_costs->interintra_cost[size_group_lookup[bsize]]
1469 [mbmi->ref_frame[1] == INTRA_FRAME];
1470 }
1471 if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1472 (mbmi->ref_frame[1] != INTRA_FRAME)) {
1473 if (last_motion_mode_allowed == WARPED_CAUSAL) {
1474 rd_stats->rate +=
1475 mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1476 } else {
1477 rd_stats->rate +=
1478 mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1479 }
1480 }
1481
1482 int64_t this_yrd = INT64_MAX;
1483
1484 if (!do_tx_search) {
1485 // Avoid doing a transform search here to speed up the overall mode
1486 // search. It will be done later in the mode search if the current
1487 // motion mode seems promising.
1488 int64_t curr_sse = -1;
1489 int64_t sse_y = -1;
1490 int est_residue_cost = 0;
1491 int64_t est_dist = 0;
1492 int64_t est_rd = 0;
1493 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1494 curr_sse = get_sse(cpi, x, &sse_y);
1495 const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1496 &est_residue_cost, &est_dist);
1497 (void)has_est_rd;
1498 assert(has_est_rd);
1499 } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1500 cpi->sf.rt_sf.use_nonrd_pick_mode) {
1501 model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1502 cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1503 NULL, &curr_sse, NULL, NULL, NULL);
1504 sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1505 }
1506 est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1507 if (est_rd * 0.80 > *best_est_rd) {
1508 mbmi->ref_frame[1] = ref_frame_1;
1509 continue;
1510 }
1511 const int mode_rate = rd_stats->rate;
1512 rd_stats->rate += est_residue_cost;
1513 rd_stats->dist = est_dist;
1514 rd_stats->rdcost = est_rd;
1515 if (rd_stats->rdcost < *best_est_rd) {
1516 *best_est_rd = rd_stats->rdcost;
1517 assert(sse_y >= 0);
1518 ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
1519 ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1520 : INT64_MAX;
1521 }
1522 if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1523 if (!is_comp_pred) {
1524 assert(curr_sse >= 0);
1525 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1526 rd_stats->rdcost, rd_stats, rd_stats_y,
1527 rd_stats_uv, mbmi);
1528 }
1529 } else {
1530 assert(curr_sse >= 0);
1531 inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1532 rd_stats->rdcost, rd_stats, rd_stats_y,
1533 rd_stats_uv, mbmi);
1534 }
1535 mbmi->skip_txfm = 0;
1536 } else {
1537 // Perform full transform search
1538 int64_t skip_rd = INT64_MAX;
1539 int64_t skip_rdy = INT64_MAX;
1540 if (cpi->sf.inter_sf.txfm_rd_gate_level) {
1541 // Check if the mode is good enough based on skip RD
1542 int64_t sse_y = INT64_MAX;
1543 int64_t curr_sse = get_sse(cpi, x, &sse_y);
1544 skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1545 skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1546 int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1547 cpi->sf.inter_sf.txfm_rd_gate_level, 0);
1548 if (!eval_txfm) continue;
1549 }
1550
1551 // Do transform search
1552 const int mode_rate = rd_stats->rate;
1553 if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1554 rd_stats->rate, ref_best_rd)) {
1555 if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1556 return INT64_MAX;
1557 }
1558 continue;
1559 }
1560 const int skip_ctx = av1_get_skip_txfm_context(xd);
1561 const int y_rate =
1562 rd_stats->skip_txfm
1563 ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1564 : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1565 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1566
1567 const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1568 if (curr_rd < ref_best_rd) {
1569 ref_best_rd = curr_rd;
1570 ref_skip_rd[0] = skip_rd;
1571 ref_skip_rd[1] = skip_rdy;
1572 }
1573 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1574 inter_mode_data_push(
1575 tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1576 rd_stats_y->rate + rd_stats_uv->rate +
1577 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1578 }
1579 }
1580
1581 if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1582 if (is_nontrans_global_motion(xd, xd->mi[0])) {
1583 mbmi->interp_filters =
1584 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1585 }
1586 }
1587
1588 const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1589 if (mode_index == 0) {
1590 args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1591 }
1592 if (mode_index == 0 || tmp_rd < best_rd) {
1593 // Update best_rd data if this is the best motion mode so far
1594 best_mbmi = *mbmi;
1595 best_rd = tmp_rd;
1596 best_rd_stats = *rd_stats;
1597 best_rd_stats_y = *rd_stats_y;
1598 best_rate_mv = tmp_rate_mv;
1599 *yrd = this_yrd;
1600 if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1601 memcpy(best_blk_skip, txfm_info->blk_skip,
1602 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1603 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1604 best_xskip_txfm = mbmi->skip_txfm;
1605 }
1606 }
1607 // Update RD and mbmi stats for selected motion mode
1608 mbmi->ref_frame[1] = ref_frame_1;
1609 *rate_mv = best_rate_mv;
1610 if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1611 av1_invalid_rd_stats(rd_stats);
1612 restore_dst_buf(xd, *orig_dst, num_planes);
1613 return INT64_MAX;
1614 }
1615 *mbmi = best_mbmi;
1616 *rd_stats = best_rd_stats;
1617 *rd_stats_y = best_rd_stats_y;
1618 if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1619 memcpy(txfm_info->blk_skip, best_blk_skip,
1620 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1621 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1622 txfm_info->skip_txfm = best_xskip_txfm;
1623
1624 restore_dst_buf(xd, *orig_dst, num_planes);
1625 return 0;
1626 }
1627
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst)1628 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1629 MACROBLOCK *const x, BLOCK_SIZE bsize,
1630 const BUFFER_SET *const orig_dst) {
1631 assert(bsize < BLOCK_SIZES_ALL);
1632 const AV1_COMMON *cm = &cpi->common;
1633 const int num_planes = av1_num_planes(cm);
1634 MACROBLOCKD *const xd = &x->e_mbd;
1635 const int mi_row = xd->mi_row;
1636 const int mi_col = xd->mi_col;
1637 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0,
1638 av1_num_planes(cm) - 1);
1639
1640 int64_t total_sse = 0;
1641 for (int plane = 0; plane < num_planes; ++plane) {
1642 const struct macroblock_plane *const p = &x->plane[plane];
1643 const struct macroblockd_plane *const pd = &xd->plane[plane];
1644 const BLOCK_SIZE plane_bsize =
1645 get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1646 const int bw = block_size_wide[plane_bsize];
1647 const int bh = block_size_high[plane_bsize];
1648
1649 av1_subtract_plane(x, plane_bsize, plane);
1650 int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
1651 total_sse += sse;
1652 }
1653 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1654 rd_stats->dist = rd_stats->sse = total_sse;
1655 rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1656 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1657
1658 restore_dst_buf(xd, *orig_dst, num_planes);
1659 return 0;
1660 }
1661
1662 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1663 // mode
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1664 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1665 int ref_idx,
1666 const MV_REFERENCE_FRAME *ref_frame,
1667 PREDICTION_MODE single_mode) {
1668 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1669 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1670 assert(single_mode != NEWMV);
1671 if (single_mode == NEARESTMV) {
1672 return 0;
1673 } else if (single_mode == NEARMV) {
1674 // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1675 // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1676 if (ref_mv_count < 2) return 1;
1677 } else if (single_mode == GLOBALMV) {
1678 // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1679 if (ref_mv_count == 0) return 1;
1680 // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1681 else if (ref_mv_count == 1)
1682 return 0;
1683
1684 int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1685 // Check GLOBALMV is matching with any mv in ref_mv_stack
1686 for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1687 int_mv this_mv;
1688
1689 if (ref_idx == 0)
1690 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1691 else
1692 this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1693
1694 if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1695 return 1;
1696 }
1697 }
1698 return 0;
1699 }
1700
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1701 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1702 int ref_idx, int ref_mv_idx,
1703 int skip_repeated_ref_mv,
1704 const MV_REFERENCE_FRAME *ref_frame,
1705 const MB_MODE_INFO_EXT *mbmi_ext) {
1706 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1707 assert(is_inter_singleref_mode(single_mode));
1708 if (single_mode == NEWMV) {
1709 this_mv->as_int = INVALID_MV;
1710 } else if (single_mode == GLOBALMV) {
1711 if (skip_repeated_ref_mv &&
1712 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1713 return 0;
1714 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1715 } else {
1716 assert(single_mode == NEARMV || single_mode == NEARESTMV);
1717 const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1718 const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1719 if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1720 assert(ref_mv_offset >= 0);
1721 if (ref_idx == 0) {
1722 *this_mv =
1723 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1724 } else {
1725 *this_mv =
1726 mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1727 }
1728 } else {
1729 if (skip_repeated_ref_mv &&
1730 check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1731 return 0;
1732 *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1733 }
1734 }
1735 return 1;
1736 }
1737
1738 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1739 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type)1740 static INLINE int skip_nearest_near_mv_using_refmv_weight(
1741 const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1742 const int8_t ref_frame_type) {
1743 if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1744
1745 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1746 const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1747 const int ref_mv_count =
1748 AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1749
1750 if (ref_mv_count == 0) return 0;
1751 // If ref mv list has atleast one nearest candidate do not prune NEARESTMV
1752 if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1753
1754 // Count number of ref mvs populated from nearest candidates
1755 int nearest_refmv_count = 0;
1756 for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1757 if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1758 }
1759
1760 // nearest_refmv_count indicates the closeness of block motion characteristics
1761 // with respect to its spatial neighbor. Lower value of nearest_refmv_count
1762 // means less correlation with its spatial neighbors. Hence less possibility
1763 // for NEARESTMV and NEARMV modes becoming the best mode since these modes
1764 // work well for blocks that shares similar motion characteristics with its
1765 // neighbor. Thus, when nearest_refmv_count is less w.r.t ref_mv_count prune
1766 // the mode.
1767 const int prune_thresh = 1 + (ref_mv_count >= 2);
1768 if (nearest_refmv_count < prune_thresh) return 1;
1769 return 0;
1770 }
1771
1772 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1773 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1774 const AV1_COMMON *cm, const MACROBLOCK *x,
1775 int skip_repeated_ref_mv) {
1776 const MACROBLOCKD *xd = &x->e_mbd;
1777 const MB_MODE_INFO *mbmi = xd->mi[0];
1778 const int is_comp_pred = has_second_ref(mbmi);
1779
1780 int ret = 1;
1781 for (int i = 0; i < is_comp_pred + 1; ++i) {
1782 int_mv this_mv;
1783 this_mv.as_int = INVALID_MV;
1784 ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1785 skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1786 if (!ret) return 0;
1787 const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1788 if (single_mode == NEWMV) {
1789 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1790 cur_mv[i] =
1791 (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1792 .this_mv
1793 : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1794 .comp_mv;
1795 } else {
1796 ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1797 }
1798 }
1799 return ret;
1800 }
1801
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1802 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1803 const MB_MODE_INFO_EXT *mbmi_ext,
1804 const int (*const drl_mode_cost0)[2],
1805 int8_t ref_frame_type) {
1806 int cost = 0;
1807 if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1808 for (int idx = 0; idx < 2; ++idx) {
1809 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1810 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1811 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1812 if (mbmi->ref_mv_idx == idx) return cost;
1813 }
1814 }
1815 return cost;
1816 }
1817
1818 if (have_nearmv_in_inter_mode(mbmi->mode)) {
1819 for (int idx = 1; idx < 3; ++idx) {
1820 if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1821 uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1822 cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1823 if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1824 }
1825 }
1826 return cost;
1827 }
1828 return cost;
1829 }
1830
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1831 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1832 const MB_MODE_INFO *const mbmi,
1833 PREDICTION_MODE this_mode) {
1834 for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1835 const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1836 const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1837 if (single_mode == NEWMV &&
1838 args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1839 return 0;
1840 }
1841 }
1842 return 1;
1843 }
1844
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1845 static int get_drl_refmv_count(const MACROBLOCK *const x,
1846 const MV_REFERENCE_FRAME *ref_frame,
1847 PREDICTION_MODE mode) {
1848 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1849 const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1850 const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1851 const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1852 const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1853 const int has_drl =
1854 (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1855 const int ref_set =
1856 has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1857
1858 return ref_set;
1859 }
1860
1861 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1862 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1863 const int qindex,
1864 const int ref_mv_idx) {
1865 if (reduce_inter_modes >= 3) return 1;
1866 // Q-index logic based pruning is enabled only for
1867 // reduce_inter_modes = 2.
1868 assert(reduce_inter_modes == 2);
1869 // When reduce_inter_modes=2, pruning happens as below based on q index.
1870 // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1871 // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1872 // For q index range between 171 and 255: no pruning.
1873 const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1874 return (ref_mv_idx >= min_prune_ref_mv_idx);
1875 }
1876
1877 // Whether this reference motion vector can be skipped, based on initial
1878 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1879 static bool ref_mv_idx_early_breakout(
1880 const SPEED_FEATURES *const sf,
1881 const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1882 const HandleInterModeArgs *const args, int64_t ref_best_rd,
1883 int ref_mv_idx) {
1884 MACROBLOCKD *xd = &x->e_mbd;
1885 MB_MODE_INFO *mbmi = xd->mi[0];
1886 const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1887 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1888 const int is_comp_pred = has_second_ref(mbmi);
1889 if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1890 if (mbmi->ref_frame[0] == LAST2_FRAME ||
1891 mbmi->ref_frame[0] == LAST3_FRAME ||
1892 mbmi->ref_frame[1] == LAST2_FRAME ||
1893 mbmi->ref_frame[1] == LAST3_FRAME) {
1894 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1895 if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1896 REF_CAT_LEVEL) {
1897 return true;
1898 }
1899 }
1900 // TODO(any): Experiment with reduce_inter_modes for compound prediction
1901 if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1902 have_newmv_in_inter_mode(mbmi->mode)) {
1903 if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1904 mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1905 const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1906 const int do_prune = prune_ref_mv_idx_using_qindex(
1907 sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1908 if (do_prune &&
1909 (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1910 REF_CAT_LEVEL)) {
1911 return true;
1912 }
1913 }
1914 }
1915 }
1916
1917 mbmi->ref_mv_idx = ref_mv_idx;
1918 if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1919 return true;
1920 }
1921 size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1922 const int drl_cost = get_drl_cost(
1923 mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1924 est_rd_rate += drl_cost;
1925 if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1926 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1927 return true;
1928 }
1929 return false;
1930 }
1931
1932 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1933 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1934 RD_STATS *rd_stats,
1935 HandleInterModeArgs *args,
1936 int ref_mv_idx, int64_t ref_best_rd,
1937 BLOCK_SIZE bsize) {
1938 MACROBLOCKD *xd = &x->e_mbd;
1939 MB_MODE_INFO *mbmi = xd->mi[0];
1940 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1941 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1942 const AV1_COMMON *cm = &cpi->common;
1943 const int is_comp_pred = has_second_ref(mbmi);
1944 const ModeCosts *mode_costs = &x->mode_costs;
1945
1946 struct macroblockd_plane *p = xd->plane;
1947 const BUFFER_SET orig_dst = {
1948 { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1949 { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1950 };
1951 av1_init_rd_stats(rd_stats);
1952
1953 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1954 mbmi->comp_group_idx = 0;
1955 mbmi->compound_idx = 1;
1956 if (mbmi->ref_frame[1] == INTRA_FRAME) {
1957 mbmi->ref_frame[1] = NONE_FRAME;
1958 }
1959 int16_t mode_ctx =
1960 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1961
1962 mbmi->num_proj_ref = 0;
1963 mbmi->motion_mode = SIMPLE_TRANSLATION;
1964 mbmi->ref_mv_idx = ref_mv_idx;
1965
1966 rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1967 const int drl_cost =
1968 get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
1969 rd_stats->rate += drl_cost;
1970
1971 int_mv cur_mv[2];
1972 if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
1973 return INT64_MAX;
1974 }
1975 assert(have_nearmv_in_inter_mode(mbmi->mode));
1976 for (int i = 0; i < is_comp_pred + 1; ++i) {
1977 mbmi->mv[i].as_int = cur_mv[i].as_int;
1978 }
1979 const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
1980 rd_stats->rate += ref_mv_cost;
1981
1982 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
1983 return INT64_MAX;
1984 }
1985
1986 mbmi->motion_mode = SIMPLE_TRANSLATION;
1987 mbmi->num_proj_ref = 0;
1988 if (is_comp_pred) {
1989 // Only compound_average
1990 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1991 mbmi->comp_group_idx = 0;
1992 mbmi->compound_idx = 1;
1993 }
1994 set_default_interp_filters(mbmi, cm->features.interp_filter);
1995
1996 const int mi_row = xd->mi_row;
1997 const int mi_col = xd->mi_col;
1998 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
1999 AOM_PLANE_Y, AOM_PLANE_Y);
2000 int est_rate;
2001 int64_t est_dist;
2002 model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2003 NULL, NULL, NULL, NULL, NULL);
2004 return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2005 }
2006
2007 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2008 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2009 // it is included.
mask_set_bit(int * mask,int index)2010 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2011
mask_check_bit(int mask,int index)2012 static INLINE bool mask_check_bit(int mask, int index) {
2013 return (mask >> index) & 0x1;
2014 }
2015
2016 // Before performing the full MV search in handle_inter_mode, do a simple
2017 // translation search and see if we can eliminate any motion vectors.
2018 // Returns an integer where, if the i-th bit is set, it means that the i-th
2019 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2020 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2021 RD_STATS *rd_stats,
2022 HandleInterModeArgs *const args,
2023 int64_t ref_best_rd, BLOCK_SIZE bsize,
2024 const int ref_set) {
2025 AV1_COMMON *const cm = &cpi->common;
2026 const MACROBLOCKD *const xd = &x->e_mbd;
2027 const MB_MODE_INFO *const mbmi = xd->mi[0];
2028 const PREDICTION_MODE this_mode = mbmi->mode;
2029
2030 // Only search indices if they have some chance of being good.
2031 int good_indices = 0;
2032 for (int i = 0; i < ref_set; ++i) {
2033 if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2034 ref_best_rd, i)) {
2035 continue;
2036 }
2037 mask_set_bit(&good_indices, i);
2038 }
2039
2040 // Only prune in NEARMV mode, if the speed feature is set, and the block size
2041 // is large enough. If these conditions are not met, return all good indices
2042 // found so far.
2043 if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2044 return good_indices;
2045 if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2046 if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2047 // Do not prune when there is internal resizing. TODO(elliottk) fix this
2048 // so b/2384 can be resolved.
2049 if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2050 (mbmi->ref_frame[1] > 0 &&
2051 av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2052 return good_indices;
2053 }
2054
2055 // Calculate the RD cost for the motion vectors using simple translation.
2056 int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2057 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2058 // If this index is bad, ignore it.
2059 if (!mask_check_bit(good_indices, ref_mv_idx)) {
2060 continue;
2061 }
2062 idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2063 cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2064 }
2065 // Find the index with the best RD cost.
2066 int best_idx = 0;
2067 for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2068 if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2069 best_idx = i;
2070 }
2071 }
2072 // Only include indices that are good and within a % of the best.
2073 const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2074 // If the simple translation cost is not within this multiple of the
2075 // best RD, skip it. Note that the cutoff is derived experimentally.
2076 const double ref_dth = 5;
2077 int result = 0;
2078 for (int i = 0; i < ref_set; ++i) {
2079 if (mask_check_bit(good_indices, i) &&
2080 (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2081 (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2082 mask_set_bit(&result, i);
2083 }
2084 }
2085 return result;
2086 }
2087
2088 /*!\brief Motion mode information for inter mode search speedup.
2089 *
2090 * Used in a speed feature to search motion modes other than
2091 * SIMPLE_TRANSLATION only on winning candidates.
2092 */
2093 typedef struct motion_mode_candidate {
2094 /*!
2095 * Mode info for the motion mode candidate.
2096 */
2097 MB_MODE_INFO mbmi;
2098 /*!
2099 * Rate describing the cost of the motion vectors for this candidate.
2100 */
2101 int rate_mv;
2102 /*!
2103 * Rate before motion mode search and transform coding is applied.
2104 */
2105 int rate2_nocoeff;
2106 /*!
2107 * An integer value 0 or 1 which indicates whether or not to skip the motion
2108 * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2109 * candidate.
2110 */
2111 int skip_motion_mode;
2112 /*!
2113 * Total RD cost for this candidate.
2114 */
2115 int64_t rd_cost;
2116 } motion_mode_candidate;
2117
2118 /*!\cond */
2119 typedef struct motion_mode_best_st_candidate {
2120 motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2121 int num_motion_mode_cand;
2122 } motion_mode_best_st_candidate;
2123
2124 // Checks if the current reference frame matches with neighbouring block's
2125 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2126 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2127 MB_MODE_INFO *nb_mbmi) {
2128 MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2129 nb_mbmi->ref_frame[1] };
2130 MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2131 cur_mbmi->ref_frame[1] };
2132 const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2133 int match_found = 0;
2134
2135 for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2136 if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2137 (cur_ref_frames[i] == nb_ref_frames[1]))
2138 match_found = 1;
2139 }
2140 return match_found;
2141 }
2142
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2143 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2144 MACROBLOCKD *xd) {
2145 if (!xd->up_available) return 1;
2146 const int mi_col = xd->mi_col;
2147 MB_MODE_INFO **cur_mbmi = xd->mi;
2148 // prev_row_mi points into the mi array, starting at the beginning of the
2149 // previous row.
2150 MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2151 const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2152 uint8_t mi_step;
2153 for (int above_mi_col = mi_col; above_mi_col < end_col;
2154 above_mi_col += mi_step) {
2155 MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2156 mi_step = mi_size_wide[above_mi[0]->bsize];
2157 int match_found = 0;
2158 if (is_inter_block(*above_mi))
2159 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2160 if (match_found) return 1;
2161 }
2162 return 0;
2163 }
2164
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2165 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2166 MACROBLOCKD *xd) {
2167 if (!xd->left_available) return 1;
2168 const int mi_row = xd->mi_row;
2169 MB_MODE_INFO **cur_mbmi = xd->mi;
2170 // prev_col_mi points into the mi array, starting at the top of the
2171 // previous column
2172 MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2173 const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2174 uint8_t mi_step;
2175 for (int left_mi_row = mi_row; left_mi_row < end_row;
2176 left_mi_row += mi_step) {
2177 MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2178 mi_step = mi_size_high[left_mi[0]->bsize];
2179 int match_found = 0;
2180 if (is_inter_block(*left_mi))
2181 match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2182 if (match_found) return 1;
2183 }
2184 return 0;
2185 }
2186 /*!\endcond */
2187
2188 /*! \brief Struct used to hold TPL data to
2189 * narrow down parts of the inter mode search.
2190 */
2191 typedef struct {
2192 /*!
2193 * The best inter cost out of all of the reference frames.
2194 */
2195 int64_t best_inter_cost;
2196 /*!
2197 * The inter cost for each reference frame.
2198 */
2199 int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2200 } PruneInfoFromTpl;
2201
2202 #if !CONFIG_REALTIME_ONLY
2203 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2204 static AOM_INLINE void get_block_level_tpl_stats(
2205 AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2206 PruneInfoFromTpl *inter_cost_info_from_tpl) {
2207 AV1_COMMON *const cm = &cpi->common;
2208
2209 assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2210 cpi->gf_frame_index < cpi->ppi->gf_group.size));
2211 const int tpl_idx = cpi->gf_frame_index;
2212 TplParams *const tpl_data = &cpi->ppi->tpl_data;
2213 if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2214 const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2215 const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2216 const int mi_wide = mi_size_wide[bsize];
2217 const int mi_high = mi_size_high[bsize];
2218 const int tpl_stride = tpl_frame->stride;
2219 const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2220 const int mi_col_sr =
2221 coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2222 const int mi_col_end_sr =
2223 coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2224 const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2225
2226 const int row_step = step;
2227 const int col_step_sr =
2228 coded_to_superres_mi(step, cm->superres_scale_denominator);
2229 for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2230 row += row_step) {
2231 for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2232 col += col_step_sr) {
2233 const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2234 row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2235
2236 // Sums up the inter cost of corresponding ref frames
2237 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2238 inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2239 this_stats->pred_error[ref_idx];
2240 }
2241 }
2242 }
2243
2244 // Computes the best inter cost (minimum inter_cost)
2245 int64_t best_inter_cost = INT64_MAX;
2246 for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2247 const int64_t cur_inter_cost =
2248 inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2249 // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2250 // calculating the minimum inter_cost
2251 if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2252 valid_refs[ref_idx])
2253 best_inter_cost = cur_inter_cost;
2254 }
2255 inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2256 }
2257 #endif
2258
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2259 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2260 PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2261 const PREDICTION_MODE this_mode, int prune_mode_level) {
2262 const int have_newmv = have_newmv_in_inter_mode(this_mode);
2263 if ((prune_mode_level < 2) && have_newmv) return 0;
2264
2265 const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2266 if (best_inter_cost == INT64_MAX) return 0;
2267
2268 const int prune_level = prune_mode_level - 1;
2269 int64_t cur_inter_cost;
2270
2271 const int is_globalmv =
2272 (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2273 const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2274
2275 // Thresholds used for pruning:
2276 // Lower value indicates aggressive pruning and higher value indicates
2277 // conservative pruning which is set based on ref_mv_idx and speed feature.
2278 // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2279 // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2280 static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2281 { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2282 };
2283
2284 const int is_comp_pred = (refs[1] > INTRA_FRAME);
2285 if (!is_comp_pred) {
2286 cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2287 } else {
2288 const int64_t inter_cost_ref0 =
2289 inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2290 const int64_t inter_cost_ref1 =
2291 inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2292 // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2293 // more aggressive pruning
2294 cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2295 }
2296
2297 // Prune the mode if cur_inter_cost is greater than threshold times
2298 // best_inter_cost
2299 if (cur_inter_cost >
2300 ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2301 best_inter_cost) >>
2302 2))
2303 return 1;
2304 return 0;
2305 }
2306
2307 /*!\brief High level function to select parameters for compound mode.
2308 *
2309 * \ingroup inter_mode_search
2310 * The main search functionality is done in the call to av1_compound_type_rd().
2311 *
2312 * \param[in] cpi Top-level encoder structure.
2313 * \param[in] x Pointer to struct holding all the data for
2314 * the current macroblock.
2315 * \param[in] args HandleInterModeArgs struct holding
2316 * miscellaneous arguments for inter mode
2317 * search. See the documentation for this
2318 * struct for a description of each member.
2319 * \param[in] ref_best_rd Best RD found so far for this block.
2320 * It is used for early termination of this
2321 * search if the RD exceeds this value.
2322 * \param[in,out] cur_mv Current motion vector.
2323 * \param[in] bsize Current block size.
2324 * \param[in,out] compmode_interinter_cost RD of the selected interinter
2325 compound mode.
2326 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2327 * allocated buffers for the compound
2328 * predictors and masks in the compound type
2329 * search.
2330 * \param[in,out] orig_dst A prediction buffer to hold a computed
2331 * prediction. This will eventually hold the
2332 * final prediction, and the tmp_dst info will
2333 * be copied here.
2334 * \param[in] tmp_dst A temporary prediction buffer to hold a
2335 * computed prediction.
2336 * \param[in,out] rate_mv The rate associated with the motion vectors.
2337 * This will be modified if a motion search is
2338 * done in the motion mode search.
2339 * \param[in,out] rd_stats Struct to keep track of the overall RD
2340 * information.
2341 * \param[in,out] skip_rd An array of length 2 where skip_rd[0] is the
2342 * best total RD for a skip mode so far, and
2343 * skip_rd[1] is the best RD for a skip mode so
2344 * far in luma. This is used as a speed feature
2345 * to skip the transform search if the computed
2346 * skip RD for the current mode is not better
2347 * than the best skip_rd so far.
2348 * \param[in,out] skip_build_pred Indicates whether or not to build the inter
2349 * predictor. If this is 0, the inter predictor
2350 * has already been built and thus we can avoid
2351 * repeating computation.
2352 * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2353 * a viable candidate.
2354 */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2355 static int process_compound_inter_mode(
2356 AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2357 int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2358 int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2359 const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2360 RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2361 MACROBLOCKD *xd = &x->e_mbd;
2362 MB_MODE_INFO *mbmi = xd->mi[0];
2363 const AV1_COMMON *cm = &cpi->common;
2364 const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2365 cm->seq_params->enable_masked_compound;
2366 int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2367 (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2368
2369 const int num_planes = av1_num_planes(cm);
2370 const int mi_row = xd->mi_row;
2371 const int mi_col = xd->mi_col;
2372 int is_luma_interp_done = 0;
2373 set_default_interp_filters(mbmi, cm->features.interp_filter);
2374
2375 int64_t best_rd_compound;
2376 int64_t rd_thresh;
2377 const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2378 const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2379 rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2380 comp_type_rd_scale);
2381 // Select compound type and any parameters related to that type
2382 // (for example, the mask parameters if it is a masked mode) and compute
2383 // the RD
2384 *compmode_interinter_cost = av1_compound_type_rd(
2385 cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2386 orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2387 ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2388 if (ref_best_rd < INT64_MAX &&
2389 (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2390 ref_best_rd) {
2391 restore_dst_buf(xd, *orig_dst, num_planes);
2392 return 1;
2393 }
2394
2395 // Build only uv predictor for COMPOUND_AVERAGE.
2396 // Note there is no need to call av1_enc_build_inter_predictor
2397 // for luma if COMPOUND_AVERAGE is selected because it is the first
2398 // candidate in av1_compound_type_rd, which means it used the dst_buf
2399 // rather than the tmp_buf.
2400 if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2401 if (num_planes > 1) {
2402 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2403 AOM_PLANE_U, num_planes - 1);
2404 }
2405 *skip_build_pred = 1;
2406 }
2407 return 0;
2408 }
2409
2410 // Speed feature to prune out MVs that are similar to previous MVs if they
2411 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2412 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2413 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2414 MB_MODE_INFO *mbmi, int pruning_factor) {
2415 int i;
2416 const int is_comp_pred = has_second_ref(mbmi);
2417 const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2418
2419 // Skip the evaluation if an MV match is found.
2420 if (ref_mv_idx > 0) {
2421 for (int idx = 0; idx < ref_mv_idx; ++idx) {
2422 if (save_mv[idx][0].as_int == INVALID_MV) continue;
2423
2424 int mv_diff = 0;
2425 for (i = 0; i < 1 + is_comp_pred; ++i) {
2426 mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2427 abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2428 }
2429
2430 // If this mode is not the best one, and current MV is similar to
2431 // previous stored MV, terminate this ref_mv_idx evaluation.
2432 if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2433 }
2434 }
2435
2436 if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2437 for (i = 0; i < is_comp_pred + 1; ++i)
2438 save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2439 }
2440
2441 return 0;
2442 }
2443
2444 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2445 *
2446 * \ingroup inter_mode_search
2447 *
2448 * Compares the sse of zero mv and the best sse found in single new_mv. If the
2449 * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2450 * Else returns 0.
2451 *
2452 * Note that the sse of here comes from single_motion_search. So it is
2453 * interpolated with the filter in motion search, not the actual interpolation
2454 * filter used in encoding.
2455 *
2456 * \param[in] fn_ptr A table of function pointers to compute SSE.
2457 * \param[in] x Pointer to struct holding all the data for
2458 * the current macroblock.
2459 * \param[in] bsize The current block_size.
2460 * \param[in] args The args to handle_inter_mode, used to track
2461 * the best SSE.
2462 * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2463 */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args)2464 static AOM_INLINE int prune_zero_mv_with_sse(
2465 const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
2466 const HandleInterModeArgs *args) {
2467 const MACROBLOCKD *xd = &x->e_mbd;
2468 const MB_MODE_INFO *mbmi = xd->mi[0];
2469
2470 const int is_comp_pred = has_second_ref(mbmi);
2471 const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2472
2473 // Check that the global mv is the same as ZEROMV
2474 assert(mbmi->mv[0].as_int == 0);
2475 assert(IMPLIES(is_comp_pred, mbmi->mv[0].as_int == 0));
2476 assert(xd->global_motion[refs[0]].wmtype == TRANSLATION ||
2477 xd->global_motion[refs[0]].wmtype == IDENTITY);
2478
2479 // Don't prune if we have invalid data
2480 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2481 assert(mbmi->mv[0].as_int == 0);
2482 if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2483 return 0;
2484 }
2485 }
2486
2487 // Sum up the sse of ZEROMV and best NEWMV
2488 unsigned int this_sse_sum = 0;
2489 unsigned int best_sse_sum = 0;
2490 for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2491 const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2492 const struct macroblockd_plane *pd = xd->plane;
2493 const struct buf_2d *src_buf = &p->src;
2494 const struct buf_2d *ref_buf = &pd->pre[idx];
2495 const uint8_t *src = src_buf->buf;
2496 const uint8_t *ref = ref_buf->buf;
2497 const int src_stride = src_buf->stride;
2498 const int ref_stride = ref_buf->stride;
2499
2500 unsigned int this_sse;
2501 fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2502 this_sse_sum += this_sse;
2503
2504 const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2505 best_sse_sum += best_sse;
2506 }
2507 if (this_sse_sum > best_sse_sum) {
2508 return 1;
2509 }
2510
2511 return 0;
2512 }
2513
2514 /*!\brief AV1 inter mode RD computation
2515 *
2516 * \ingroup inter_mode_search
2517 * Do the RD search for a given inter mode and compute all information relevant
2518 * to the input mode. It will compute the best MV,
2519 * compound parameters (if the mode is a compound mode) and interpolation filter
2520 * parameters.
2521 *
2522 * \param[in] cpi Top-level encoder structure.
2523 * \param[in] tile_data Pointer to struct holding adaptive
2524 * data/contexts/models for the tile during
2525 * encoding.
2526 * \param[in] x Pointer to structure holding all the data
2527 * for the current macroblock.
2528 * \param[in] bsize Current block size.
2529 * \param[in,out] rd_stats Struct to keep track of the overall RD
2530 * information.
2531 * \param[in,out] rd_stats_y Struct to keep track of the RD information
2532 * for only the Y plane.
2533 * \param[in,out] rd_stats_uv Struct to keep track of the RD information
2534 * for only the UV planes.
2535 * \param[in] args HandleInterModeArgs struct holding
2536 * miscellaneous arguments for inter mode
2537 * search. See the documentation for this
2538 * struct for a description of each member.
2539 * \param[in] ref_best_rd Best RD found so far for this block.
2540 * It is used for early termination of this
2541 * search if the RD exceeds this value.
2542 * \param[in] tmp_buf Temporary buffer used to hold predictors
2543 * built in this search.
2544 * \param[in,out] rd_buffers CompoundTypeRdBuffers struct to hold all
2545 * allocated buffers for the compound
2546 * predictors and masks in the compound type
2547 * search.
2548 * \param[in,out] best_est_rd Estimated RD for motion mode search if
2549 * do_tx_search (see below) is 0.
2550 * \param[in] do_tx_search Parameter to indicate whether or not to do
2551 * a full transform search. This will compute
2552 * an estimated RD for the modes without the
2553 * transform search and later perform the full
2554 * transform search on the best candidates.
2555 * \param[in,out] inter_modes_info InterModesInfo struct to hold inter mode
2556 * information to perform a full transform
2557 * search only on winning candidates searched
2558 * with an estimate for transform coding RD.
2559 * \param[in,out] motion_mode_cand A motion_mode_candidate struct to store
2560 * motion mode information used in a speed
2561 * feature to search motion modes other than
2562 * SIMPLE_TRANSLATION only on winning
2563 * candidates.
2564 * \param[in,out] skip_rd A length 2 array, where skip_rd[0] is the
2565 * best total RD for a skip mode so far, and
2566 * skip_rd[1] is the best RD for a skip mode so
2567 * far in luma. This is used as a speed feature
2568 * to skip the transform search if the computed
2569 * skip RD for the current mode is not better
2570 * than the best skip_rd so far.
2571 * \param[in] inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2572 * narrow down the search based on data
2573 * collected in the TPL model.
2574 * \param[out] yrd Stores the rdcost corresponding to encoding
2575 * the luma plane.
2576 *
2577 * \return The RD cost for the mode being searched.
2578 */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2579 static int64_t handle_inter_mode(
2580 AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2581 BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2582 RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2583 uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2584 int64_t *best_est_rd, const int do_tx_search,
2585 InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2586 int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2587 int64_t *yrd) {
2588 const AV1_COMMON *cm = &cpi->common;
2589 const int num_planes = av1_num_planes(cm);
2590 MACROBLOCKD *xd = &x->e_mbd;
2591 MB_MODE_INFO *mbmi = xd->mi[0];
2592 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2593 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2594 const int is_comp_pred = has_second_ref(mbmi);
2595 const PREDICTION_MODE this_mode = mbmi->mode;
2596
2597 #if CONFIG_REALTIME_ONLY
2598 const int prune_modes_based_on_tpl = 0;
2599 #else // CONFIG_REALTIME_ONLY
2600 const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2601 const int prune_modes_based_on_tpl =
2602 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2603 av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2604 #endif // CONFIG_REALTIME_ONLY
2605 int i;
2606 // Reference frames for this mode
2607 const int refs[2] = { mbmi->ref_frame[0],
2608 (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2609 int rate_mv = 0;
2610 int64_t rd = INT64_MAX;
2611 // Do first prediction into the destination buffer. Do the next
2612 // prediction into a temporary buffer. Then keep track of which one
2613 // of these currently holds the best predictor, and use the other
2614 // one for future predictions. In the end, copy from tmp_buf to
2615 // dst if necessary.
2616 struct macroblockd_plane *pd = xd->plane;
2617 const BUFFER_SET orig_dst = {
2618 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2619 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2620 };
2621 const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2622 tmp_buf + 2 * MAX_SB_SQUARE },
2623 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2624
2625 int64_t ret_val = INT64_MAX;
2626 const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2627 RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2628 int64_t best_rd = INT64_MAX;
2629 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2630 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2631 int64_t best_yrd = INT64_MAX;
2632 MB_MODE_INFO best_mbmi = *mbmi;
2633 int best_xskip_txfm = 0;
2634 int64_t newmv_ret_val = INT64_MAX;
2635 inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2636
2637 // Do not prune the mode based on inter cost from tpl if the current ref frame
2638 // is the winner ref in neighbouring blocks.
2639 int ref_match_found_in_above_nb = 0;
2640 int ref_match_found_in_left_nb = 0;
2641 if (prune_modes_based_on_tpl) {
2642 ref_match_found_in_above_nb =
2643 find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2644 ref_match_found_in_left_nb =
2645 find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2646 }
2647
2648 // First, perform a simple translation search for each of the indices. If
2649 // an index performs well, it will be fully searched in the main loop
2650 // of this function.
2651 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2652 // Save MV results from first 2 ref_mv_idx.
2653 int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2654 int best_ref_mv_idx = -1;
2655 const int idx_mask =
2656 ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2657 const int16_t mode_ctx =
2658 av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2659 const ModeCosts *mode_costs = &x->mode_costs;
2660 const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2661 const int base_rate =
2662 args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2663
2664 for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2665 save_mv[i][0].as_int = INVALID_MV;
2666 save_mv[i][1].as_int = INVALID_MV;
2667 }
2668
2669 // Main loop of this function. This will iterate over all of the ref mvs
2670 // in the dynamic reference list and do the following:
2671 // 1.) Get the current MV. Create newmv MV if necessary
2672 // 2.) Search compound type and parameters if applicable
2673 // 3.) Do interpolation filter search
2674 // 4.) Build the inter predictor
2675 // 5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2676 // WARPED_CAUSAL)
2677 // 6.) Update stats if best so far
2678 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2679 mbmi->ref_mv_idx = ref_mv_idx;
2680
2681 mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2682 mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2683 const int drl_cost = get_drl_cost(
2684 mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2685 mode_info[ref_mv_idx].drl_cost = drl_cost;
2686 mode_info[ref_mv_idx].skip = 0;
2687
2688 if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2689 // MV did not perform well in simple translation search. Skip it.
2690 continue;
2691 }
2692 if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2693 !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2694 // Skip mode if TPL model indicates it will not be beneficial.
2695 if (prune_modes_based_on_tpl_stats(
2696 inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2697 cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2698 continue;
2699 }
2700 av1_init_rd_stats(rd_stats);
2701
2702 // Initialize compound mode data
2703 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2704 mbmi->comp_group_idx = 0;
2705 mbmi->compound_idx = 1;
2706 if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2707
2708 mbmi->num_proj_ref = 0;
2709 mbmi->motion_mode = SIMPLE_TRANSLATION;
2710
2711 // Compute cost for signalling this DRL index
2712 rd_stats->rate = base_rate;
2713 rd_stats->rate += drl_cost;
2714
2715 int rs = 0;
2716 int compmode_interinter_cost = 0;
2717
2718 int_mv cur_mv[2];
2719
2720 // TODO(Cherma): Extend this speed feature to support compound mode
2721 int skip_repeated_ref_mv =
2722 is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2723 // Generate the current mv according to the prediction mode
2724 if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2725 continue;
2726 }
2727
2728 // The above call to build_cur_mv does not handle NEWMV modes. Build
2729 // the mv here if we have NEWMV for any predictors.
2730 if (have_newmv_in_inter_mode(this_mode)) {
2731 #if CONFIG_COLLECT_COMPONENT_TIMING
2732 start_timing(cpi, handle_newmv_time);
2733 #endif
2734 newmv_ret_val =
2735 handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2736 #if CONFIG_COLLECT_COMPONENT_TIMING
2737 end_timing(cpi, handle_newmv_time);
2738 #endif
2739
2740 if (newmv_ret_val != 0) continue;
2741
2742 if (is_inter_singleref_mode(this_mode) &&
2743 cur_mv[0].as_int != INVALID_MV) {
2744 const MV_REFERENCE_FRAME ref = refs[0];
2745 const unsigned int this_sse = x->pred_sse[ref];
2746 if (this_sse < args->best_single_sse_in_refs[ref]) {
2747 args->best_single_sse_in_refs[ref] = this_sse;
2748 }
2749 }
2750
2751 rd_stats->rate += rate_mv;
2752 }
2753 // Copy the motion vector for this mode into mbmi struct
2754 for (i = 0; i < is_comp_pred + 1; ++i) {
2755 mbmi->mv[i].as_int = cur_mv[i].as_int;
2756 }
2757
2758 if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2759 mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2760 continue;
2761 }
2762
2763 // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2764 // is enabled, and the current MV is similar to a previous one.
2765 if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2766 prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2767 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2768 continue;
2769
2770 if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2771 cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH &&
2772 (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2773 if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args)) {
2774 continue;
2775 }
2776 }
2777
2778 int skip_build_pred = 0;
2779 const int mi_row = xd->mi_row;
2780 const int mi_col = xd->mi_col;
2781
2782 // Handle a compound predictor, continue if it is determined this
2783 // cannot be the best compound mode
2784 if (is_comp_pred) {
2785 #if CONFIG_COLLECT_COMPONENT_TIMING
2786 start_timing(cpi, compound_type_rd_time);
2787 #endif
2788 const int not_best_mode = process_compound_inter_mode(
2789 cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2790 rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2791 &skip_build_pred);
2792 #if CONFIG_COLLECT_COMPONENT_TIMING
2793 end_timing(cpi, compound_type_rd_time);
2794 #endif
2795 if (not_best_mode) continue;
2796 }
2797
2798 #if CONFIG_COLLECT_COMPONENT_TIMING
2799 start_timing(cpi, interpolation_filter_search_time);
2800 #endif
2801 // Determine the interpolation filter for this mode
2802 ret_val = av1_interpolation_filter_search(
2803 x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2804 &skip_build_pred, args, ref_best_rd);
2805 #if CONFIG_COLLECT_COMPONENT_TIMING
2806 end_timing(cpi, interpolation_filter_search_time);
2807 #endif
2808 if (args->modelled_rd != NULL && !is_comp_pred) {
2809 args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
2810 }
2811 if (ret_val != 0) {
2812 restore_dst_buf(xd, orig_dst, num_planes);
2813 continue;
2814 } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
2815 ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
2816 restore_dst_buf(xd, orig_dst, num_planes);
2817 continue;
2818 }
2819
2820 // Compute modelled RD if enabled
2821 if (args->modelled_rd != NULL) {
2822 if (is_comp_pred) {
2823 const int mode0 = compound_ref0_mode(this_mode);
2824 const int mode1 = compound_ref1_mode(this_mode);
2825 const int64_t mrd =
2826 AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
2827 args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
2828 if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
2829 restore_dst_buf(xd, orig_dst, num_planes);
2830 continue;
2831 }
2832 }
2833 }
2834 rd_stats->rate += compmode_interinter_cost;
2835 if (skip_build_pred != 1) {
2836 // Build this inter predictor if it has not been previously built
2837 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
2838 av1_num_planes(cm) - 1);
2839 }
2840
2841 #if CONFIG_COLLECT_COMPONENT_TIMING
2842 start_timing(cpi, motion_mode_rd_time);
2843 #endif
2844 int rate2_nocoeff = rd_stats->rate;
2845 // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
2846 // OBMC_CAUSAL or WARPED_CAUSAL
2847 int64_t this_yrd;
2848 ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
2849 rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
2850 &orig_dst, best_est_rd, do_tx_search,
2851 inter_modes_info, 0, &this_yrd);
2852 #if CONFIG_COLLECT_COMPONENT_TIMING
2853 end_timing(cpi, motion_mode_rd_time);
2854 #endif
2855 assert(
2856 IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
2857
2858 if (ret_val != INT64_MAX) {
2859 int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2860 const THR_MODES mode_enum = get_prediction_mode_idx(
2861 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2862 // Collect mode stats for multiwinner mode processing
2863 store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
2864 rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
2865 cpi->sf.winner_mode_sf.multi_winner_mode_type,
2866 do_tx_search);
2867 if (tmp_rd < best_rd) {
2868 best_yrd = this_yrd;
2869 // Update the best rd stats if we found the best mode so far
2870 best_rd_stats = *rd_stats;
2871 best_rd_stats_y = *rd_stats_y;
2872 best_rd_stats_uv = *rd_stats_uv;
2873 best_rd = tmp_rd;
2874 best_mbmi = *mbmi;
2875 best_xskip_txfm = txfm_info->skip_txfm;
2876 memcpy(best_blk_skip, txfm_info->blk_skip,
2877 sizeof(best_blk_skip[0]) * xd->height * xd->width);
2878 av1_copy_array(best_tx_type_map, xd->tx_type_map,
2879 xd->height * xd->width);
2880 motion_mode_cand->rate_mv = rate_mv;
2881 motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
2882 }
2883
2884 if (tmp_rd < ref_best_rd) {
2885 ref_best_rd = tmp_rd;
2886 best_ref_mv_idx = ref_mv_idx;
2887 }
2888 }
2889 restore_dst_buf(xd, orig_dst, num_planes);
2890 }
2891
2892 if (best_rd == INT64_MAX) return INT64_MAX;
2893
2894 // re-instate status of the best choice
2895 *rd_stats = best_rd_stats;
2896 *rd_stats_y = best_rd_stats_y;
2897 *rd_stats_uv = best_rd_stats_uv;
2898 *yrd = best_yrd;
2899 *mbmi = best_mbmi;
2900 txfm_info->skip_txfm = best_xskip_txfm;
2901 assert(IMPLIES(mbmi->comp_group_idx == 1,
2902 mbmi->interinter_comp.type != COMPOUND_AVERAGE));
2903 memcpy(txfm_info->blk_skip, best_blk_skip,
2904 sizeof(best_blk_skip[0]) * xd->height * xd->width);
2905 av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
2906
2907 rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2908
2909 return rd_stats->rdcost;
2910 }
2911
2912 /*!\brief Search for the best intrabc predictor
2913 *
2914 * \ingroup intra_mode_search
2915 * \callergraph
2916 * This function performs a motion search to find the best intrabc predictor.
2917 *
2918 * \returns Returns the best overall rdcost (including the non-intrabc modes
2919 * search before this function).
2920 */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)2921 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
2922 PICK_MODE_CONTEXT *ctx,
2923 RD_STATS *rd_stats, BLOCK_SIZE bsize,
2924 int64_t best_rd) {
2925 const AV1_COMMON *const cm = &cpi->common;
2926 if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
2927 cpi->sf.rt_sf.use_nonrd_pick_mode)
2928 return INT64_MAX;
2929 const int num_planes = av1_num_planes(cm);
2930
2931 MACROBLOCKD *const xd = &x->e_mbd;
2932 const TileInfo *tile = &xd->tile;
2933 MB_MODE_INFO *mbmi = xd->mi[0];
2934 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2935
2936 const int mi_row = xd->mi_row;
2937 const int mi_col = xd->mi_col;
2938 const int w = block_size_wide[bsize];
2939 const int h = block_size_high[bsize];
2940 const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
2941 const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
2942
2943 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2944 const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
2945 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
2946 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
2947 mbmi_ext->mode_context);
2948 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
2949 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
2950 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
2951 int_mv nearestmv, nearmv;
2952 av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
2953 0);
2954
2955 if (nearestmv.as_int == INVALID_MV) {
2956 nearestmv.as_int = 0;
2957 }
2958 if (nearmv.as_int == INVALID_MV) {
2959 nearmv.as_int = 0;
2960 }
2961
2962 int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
2963 if (dv_ref.as_int == 0) {
2964 av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
2965 }
2966 // Ref DV should not have sub-pel.
2967 assert((dv_ref.as_mv.col & 7) == 0);
2968 assert((dv_ref.as_mv.row & 7) == 0);
2969 mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
2970
2971 struct buf_2d yv12_mb[MAX_MB_PLANE];
2972 av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
2973 for (int i = 0; i < num_planes; ++i) {
2974 xd->plane[i].pre[0] = yv12_mb[i];
2975 }
2976
2977 enum IntrabcMotionDirection {
2978 IBC_MOTION_ABOVE,
2979 IBC_MOTION_LEFT,
2980 IBC_MOTION_DIRECTIONS
2981 };
2982
2983 MB_MODE_INFO best_mbmi = *mbmi;
2984 RD_STATS best_rdstats = *rd_stats;
2985 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
2986 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2987 av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
2988
2989 FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
2990 const search_site_config *lookahead_search_sites =
2991 cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
2992 av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
2993 &dv_ref.as_mv, lookahead_search_sites,
2994 /*fine_search_interval=*/0);
2995 const IntraBCMVCosts *const dv_costs = x->dv_costs;
2996 av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
2997
2998 for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
2999 dir < IBC_MOTION_DIRECTIONS; ++dir) {
3000 switch (dir) {
3001 case IBC_MOTION_ABOVE:
3002 fullms_params.mv_limits.col_min =
3003 (tile->mi_col_start - mi_col) * MI_SIZE;
3004 fullms_params.mv_limits.col_max =
3005 (tile->mi_col_end - mi_col) * MI_SIZE - w;
3006 fullms_params.mv_limits.row_min =
3007 (tile->mi_row_start - mi_row) * MI_SIZE;
3008 fullms_params.mv_limits.row_max =
3009 (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3010 break;
3011 case IBC_MOTION_LEFT:
3012 fullms_params.mv_limits.col_min =
3013 (tile->mi_col_start - mi_col) * MI_SIZE;
3014 fullms_params.mv_limits.col_max =
3015 (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3016 // TODO(aconverse@google.com): Minimize the overlap between above and
3017 // left areas.
3018 fullms_params.mv_limits.row_min =
3019 (tile->mi_row_start - mi_row) * MI_SIZE;
3020 int bottom_coded_mi_edge =
3021 AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3022 fullms_params.mv_limits.row_max =
3023 (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3024 break;
3025 default: assert(0);
3026 }
3027 assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3028 assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3029 assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3030 assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3031
3032 av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3033
3034 if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3035 fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3036 continue;
3037 }
3038
3039 const int step_param = cpi->mv_search_params.mv_step_param;
3040 const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3041 IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3042 int_mv best_mv, best_hash_mv;
3043
3044 int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
3045 NULL, &best_mv.as_fullmv, NULL);
3046 const int hashsme = av1_intrabc_hash_search(
3047 cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3048 if (hashsme < bestsme) {
3049 best_mv = best_hash_mv;
3050 bestsme = hashsme;
3051 }
3052
3053 if (bestsme == INT_MAX) continue;
3054 const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3055 if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3056 get_fullmv_from_mv(&dv)))
3057 continue;
3058 if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3059 cm->seq_params->mib_size_log2))
3060 continue;
3061
3062 // DV should not have sub-pel.
3063 assert((dv.col & 7) == 0);
3064 assert((dv.row & 7) == 0);
3065 memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3066 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3067 mbmi->use_intrabc = 1;
3068 mbmi->mode = DC_PRED;
3069 mbmi->uv_mode = UV_DC_PRED;
3070 mbmi->motion_mode = SIMPLE_TRANSLATION;
3071 mbmi->mv[0].as_mv = dv;
3072 mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3073 mbmi->skip_txfm = 0;
3074 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3075 av1_num_planes(cm) - 1);
3076
3077 // TODO(aconverse@google.com): The full motion field defining discount
3078 // in MV_COST_WEIGHT is too large. Explore other values.
3079 const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3080 dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3081 const int rate_mode = x->mode_costs.intrabc_cost[1];
3082 RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3083 if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3084 &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3085 continue;
3086 rd_stats_yuv.rdcost =
3087 RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3088 if (rd_stats_yuv.rdcost < best_rd) {
3089 best_rd = rd_stats_yuv.rdcost;
3090 best_mbmi = *mbmi;
3091 best_rdstats = rd_stats_yuv;
3092 memcpy(best_blk_skip, txfm_info->blk_skip,
3093 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3094 av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3095 }
3096 }
3097 *mbmi = best_mbmi;
3098 *rd_stats = best_rdstats;
3099 memcpy(txfm_info->blk_skip, best_blk_skip,
3100 sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3101 av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3102 #if CONFIG_RD_DEBUG
3103 mbmi->rd_stats = *rd_stats;
3104 #endif
3105 return best_rd;
3106 }
3107
3108 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3109 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3110 // the typedef will prevent doxygen from finding this function and generating
3111 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3112 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3113 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3114 struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3115 PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3116 const AV1_COMMON *const cm = &cpi->common;
3117 MACROBLOCKD *const xd = &x->e_mbd;
3118 MB_MODE_INFO *const mbmi = xd->mi[0];
3119 const int num_planes = av1_num_planes(cm);
3120 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3121 int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3122 int y_skip_txfm = 0, uv_skip_txfm = 0;
3123 int64_t dist_y = 0, dist_uv = 0;
3124
3125 ctx->rd_stats.skip_txfm = 0;
3126 mbmi->ref_frame[0] = INTRA_FRAME;
3127 mbmi->ref_frame[1] = NONE_FRAME;
3128 mbmi->use_intrabc = 0;
3129 mbmi->mv[0].as_int = 0;
3130 mbmi->skip_mode = 0;
3131
3132 const int64_t intra_yrd =
3133 av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3134 &y_skip_txfm, bsize, best_rd, ctx);
3135
3136 // Initialize default mode evaluation params
3137 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3138
3139 if (intra_yrd < best_rd) {
3140 // Search intra modes for uv planes if needed
3141 if (num_planes > 1) {
3142 // Set up the tx variables for reproducing the y predictions in case we
3143 // need it for chroma-from-luma.
3144 if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3145 memcpy(txfm_info->blk_skip, ctx->blk_skip,
3146 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3147 av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3148 }
3149 const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3150 av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3151 &dist_uv, &uv_skip_txfm, bsize,
3152 max_uv_tx_size);
3153 }
3154
3155 // Intra block is always coded as non-skip
3156 rd_cost->rate =
3157 rate_y + rate_uv +
3158 x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3159 rd_cost->dist = dist_y + dist_uv;
3160 rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3161 rd_cost->skip_txfm = 0;
3162 } else {
3163 rd_cost->rate = INT_MAX;
3164 }
3165
3166 if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3167 best_rd = rd_cost->rdcost;
3168 if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3169 ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3170 memcpy(ctx->blk_skip, txfm_info->blk_skip,
3171 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3172 assert(rd_cost->rate != INT_MAX);
3173 }
3174 if (rd_cost->rate == INT_MAX) return;
3175
3176 ctx->mic = *xd->mi[0];
3177 av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3178 av1_ref_frame_type(xd->mi[0]->ref_frame));
3179 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3180 }
3181
3182 static AOM_INLINE void calc_target_weighted_pred(
3183 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3184 const uint8_t *above, int above_stride, const uint8_t *left,
3185 int left_stride);
3186
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3187 static AOM_INLINE void rd_pick_skip_mode(
3188 RD_STATS *rd_cost, InterModeSearchState *search_state,
3189 const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3190 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3191 const AV1_COMMON *const cm = &cpi->common;
3192 const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3193 const int num_planes = av1_num_planes(cm);
3194 MACROBLOCKD *const xd = &x->e_mbd;
3195 MB_MODE_INFO *const mbmi = xd->mi[0];
3196
3197 x->compound_idx = 1; // COMPOUND_AVERAGE
3198 RD_STATS skip_mode_rd_stats;
3199 av1_invalid_rd_stats(&skip_mode_rd_stats);
3200
3201 if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3202 skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3203 return;
3204 }
3205
3206 const MV_REFERENCE_FRAME ref_frame =
3207 LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3208 const MV_REFERENCE_FRAME second_ref_frame =
3209 LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3210 const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3211 const THR_MODES mode_index =
3212 get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3213
3214 if (mode_index == THR_INVALID) {
3215 return;
3216 }
3217
3218 if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3219 cpi->sf.inter_sf.disable_onesided_comp) &&
3220 cpi->all_one_sided_refs) {
3221 return;
3222 }
3223
3224 mbmi->mode = this_mode;
3225 mbmi->uv_mode = UV_DC_PRED;
3226 mbmi->ref_frame[0] = ref_frame;
3227 mbmi->ref_frame[1] = second_ref_frame;
3228 const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3229 if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3230 MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3231 if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3232 mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3233 return;
3234 }
3235 av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3236 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3237 mbmi_ext->mode_context);
3238 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3239 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3240 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3241 }
3242
3243 assert(this_mode == NEAREST_NEARESTMV);
3244 if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3245 return;
3246 }
3247
3248 mbmi->filter_intra_mode_info.use_filter_intra = 0;
3249 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3250 mbmi->comp_group_idx = 0;
3251 mbmi->compound_idx = x->compound_idx;
3252 mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3253 mbmi->motion_mode = SIMPLE_TRANSLATION;
3254 mbmi->ref_mv_idx = 0;
3255 mbmi->skip_mode = mbmi->skip_txfm = 1;
3256 mbmi->palette_mode_info.palette_size[0] = 0;
3257 mbmi->palette_mode_info.palette_size[1] = 0;
3258
3259 set_default_interp_filters(mbmi, cm->features.interp_filter);
3260
3261 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3262 for (int i = 0; i < num_planes; i++) {
3263 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3264 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3265 }
3266
3267 BUFFER_SET orig_dst;
3268 for (int i = 0; i < num_planes; i++) {
3269 orig_dst.plane[i] = xd->plane[i].dst.buf;
3270 orig_dst.stride[i] = xd->plane[i].dst.stride;
3271 }
3272
3273 // Obtain the rdcost for skip_mode.
3274 skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst);
3275
3276 // Compare the use of skip_mode with the best intra/inter mode obtained.
3277 const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3278 int64_t best_intra_inter_mode_cost = INT64_MAX;
3279 if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3280 const ModeCosts *mode_costs = &x->mode_costs;
3281 best_intra_inter_mode_cost = RDCOST(
3282 x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3283 rd_cost->dist);
3284 // Account for non-skip mode rate in total rd stats
3285 rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3286 av1_rd_cost_update(x->rdmult, rd_cost);
3287 }
3288
3289 if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3290 (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3291 assert(mode_index != THR_INVALID);
3292 search_state->best_mbmode.skip_mode = 1;
3293 search_state->best_mbmode = *mbmi;
3294 memset(search_state->best_mbmode.inter_tx_size,
3295 search_state->best_mbmode.tx_size,
3296 sizeof(search_state->best_mbmode.inter_tx_size));
3297 set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3298 search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3299 xd);
3300 search_state->best_mode_index = mode_index;
3301
3302 // Update rd_cost
3303 rd_cost->rate = skip_mode_rd_stats.rate;
3304 rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3305 rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3306
3307 search_state->best_rd = rd_cost->rdcost;
3308 search_state->best_skip2 = 1;
3309 search_state->best_mode_skippable = 1;
3310
3311 x->txfm_search_info.skip_txfm = 1;
3312 }
3313 }
3314
3315 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3316 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3317 MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3318 int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3319 RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3320 THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3321 int mode_idx) {
3322 MB_MODE_INFO *winner_mbmi;
3323 if (multi_winner_mode_type) {
3324 assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3325 WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3326 winner_mbmi = &winner_mode_stat->mbmi;
3327
3328 *winner_rd_cost = &winner_mode_stat->rd_cost;
3329 *winner_rate_y = winner_mode_stat->rate_y;
3330 *winner_rate_uv = winner_mode_stat->rate_uv;
3331 *winner_mode_index = winner_mode_stat->mode_index;
3332 } else {
3333 winner_mbmi = best_mbmode;
3334 *winner_rd_cost = best_rd_cost;
3335 *winner_rate_y = best_rate_y;
3336 *winner_rate_uv = best_rate_uv;
3337 *winner_mode_index = *best_mode_index;
3338 }
3339 return winner_mbmi;
3340 }
3341
3342 // speed feature: fast intra/inter transform type search
3343 // Used for speed >= 2
3344 // When this speed feature is on, in rd mode search, only DCT is used.
3345 // After the mode is determined, this function is called, to select
3346 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3347 static AOM_INLINE void refine_winner_mode_tx(
3348 const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3349 PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3350 MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3351 int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3352 const AV1_COMMON *const cm = &cpi->common;
3353 MACROBLOCKD *const xd = &x->e_mbd;
3354 MB_MODE_INFO *const mbmi = xd->mi[0];
3355 TxfmSearchParams *txfm_params = &x->txfm_search_params;
3356 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3357 int64_t best_rd;
3358 const int num_planes = av1_num_planes(cm);
3359
3360 if (!is_winner_mode_processing_enabled(cpi, best_mbmode, best_mbmode->mode))
3361 return;
3362
3363 // Set params for winner mode evaluation
3364 set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3365
3366 // No best mode identified so far
3367 if (*best_mode_index == THR_INVALID) return;
3368
3369 best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3370 for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3371 RD_STATS *winner_rd_stats = NULL;
3372 int winner_rate_y = 0, winner_rate_uv = 0;
3373 THR_MODES winner_mode_index = 0;
3374
3375 // TODO(any): Combine best mode and multi-winner mode processing paths
3376 // Get winner mode stats for current mode index
3377 MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3378 x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3379 &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3380 cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3381
3382 if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3383 winner_mode_index != THR_INVALID &&
3384 is_winner_mode_processing_enabled(cpi, winner_mbmi,
3385 winner_mbmi->mode)) {
3386 RD_STATS rd_stats = *winner_rd_stats;
3387 int skip_blk = 0;
3388 RD_STATS rd_stats_y, rd_stats_uv;
3389 const int skip_ctx = av1_get_skip_txfm_context(xd);
3390
3391 *mbmi = *winner_mbmi;
3392
3393 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3394
3395 // Select prediction reference frames.
3396 for (int i = 0; i < num_planes; i++) {
3397 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3398 if (has_second_ref(mbmi))
3399 xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3400 }
3401
3402 if (is_inter_mode(mbmi->mode)) {
3403 const int mi_row = xd->mi_row;
3404 const int mi_col = xd->mi_col;
3405 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3406 av1_num_planes(cm) - 1);
3407 if (mbmi->motion_mode == OBMC_CAUSAL)
3408 av1_build_obmc_inter_predictors_sb(cm, xd);
3409
3410 av1_subtract_plane(x, bsize, 0);
3411 if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3412 !xd->lossless[mbmi->segment_id]) {
3413 av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3414 INT64_MAX);
3415 assert(rd_stats_y.rate != INT_MAX);
3416 } else {
3417 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3418 INT64_MAX);
3419 memset(mbmi->inter_tx_size, mbmi->tx_size,
3420 sizeof(mbmi->inter_tx_size));
3421 for (int i = 0; i < xd->height * xd->width; ++i)
3422 set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3423 }
3424 } else {
3425 av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3426 INT64_MAX);
3427 }
3428
3429 if (num_planes > 1) {
3430 av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3431 } else {
3432 av1_init_rd_stats(&rd_stats_uv);
3433 }
3434
3435 const ModeCosts *mode_costs = &x->mode_costs;
3436 if (is_inter_mode(mbmi->mode) &&
3437 RDCOST(x->rdmult,
3438 mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3439 rd_stats_uv.rate,
3440 (rd_stats_y.dist + rd_stats_uv.dist)) >
3441 RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3442 (rd_stats_y.sse + rd_stats_uv.sse))) {
3443 skip_blk = 1;
3444 rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3445 rd_stats_uv.rate = 0;
3446 rd_stats_y.dist = rd_stats_y.sse;
3447 rd_stats_uv.dist = rd_stats_uv.sse;
3448 } else {
3449 skip_blk = 0;
3450 rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3451 }
3452 int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3453 winner_rate_y - winner_rate_uv;
3454 int64_t this_rd =
3455 RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3456 if (best_rd > this_rd) {
3457 *best_mbmode = *mbmi;
3458 *best_mode_index = winner_mode_index;
3459 av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3460 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3461 rd_cost->rate = this_rate;
3462 rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3463 rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3464 rd_cost->rdcost = this_rd;
3465 best_rd = this_rd;
3466 *best_skip2 = skip_blk;
3467 }
3468 }
3469 }
3470 }
3471
3472 /*!\cond */
3473 typedef struct {
3474 // Mask for each reference frame, specifying which prediction modes to NOT try
3475 // during search.
3476 uint32_t pred_modes[REF_FRAMES];
3477 // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3478 // reference frames (i, j).
3479 // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3480 // (NONE_FRAME).
3481 bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3482 } mode_skip_mask_t;
3483 /*!\endcond */
3484
3485 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3486 static AOM_INLINE void disable_reference(
3487 MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3488 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3489 ref_combo[ref][ref2 + 1] = true;
3490 }
3491 }
3492
3493 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3494 static AOM_INLINE void disable_inter_references_except_altref(
3495 bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3496 disable_reference(LAST_FRAME, ref_combo);
3497 disable_reference(LAST2_FRAME, ref_combo);
3498 disable_reference(LAST3_FRAME, ref_combo);
3499 disable_reference(GOLDEN_FRAME, ref_combo);
3500 disable_reference(BWDREF_FRAME, ref_combo);
3501 disable_reference(ALTREF2_FRAME, ref_combo);
3502 }
3503
3504 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3505 { LAST_FRAME, NONE_FRAME }, { ALTREF_FRAME, NONE_FRAME },
3506 { LAST_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, NONE_FRAME },
3507 { INTRA_FRAME, NONE_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
3508 { LAST_FRAME, GOLDEN_FRAME }, { LAST_FRAME, INTRA_FRAME },
3509 { LAST_FRAME, BWDREF_FRAME }, { LAST_FRAME, LAST3_FRAME },
3510 { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3511 { BWDREF_FRAME, NONE_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
3512 { ALTREF_FRAME, INTRA_FRAME }, { BWDREF_FRAME, INTRA_FRAME },
3513 };
3514
3515 static const MV_REFERENCE_FRAME real_time_ref_combos[][2] = {
3516 { LAST_FRAME, NONE_FRAME },
3517 { ALTREF_FRAME, NONE_FRAME },
3518 { GOLDEN_FRAME, NONE_FRAME },
3519 { INTRA_FRAME, NONE_FRAME }
3520 };
3521
3522 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3523
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3524 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3525 REF_SET ref_set) {
3526 if (ref_set == REF_SET_FULL) {
3527 // Everything available by default.
3528 memset(mask, 0, sizeof(*mask));
3529 } else {
3530 // All modes available by default.
3531 memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3532 // All references disabled first.
3533 for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3534 for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3535 mask->ref_combo[ref1][ref2 + 1] = true;
3536 }
3537 }
3538 const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3539 int num_ref_combos;
3540
3541 // Then enable reduced set of references explicitly.
3542 switch (ref_set) {
3543 case REF_SET_REDUCED:
3544 ref_set_combos = reduced_ref_combos;
3545 num_ref_combos =
3546 (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3547 break;
3548 case REF_SET_REALTIME:
3549 ref_set_combos = real_time_ref_combos;
3550 num_ref_combos =
3551 (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3552 break;
3553 default: assert(0); num_ref_combos = 0;
3554 }
3555
3556 for (int i = 0; i < num_ref_combos; ++i) {
3557 const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3558 mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3559 }
3560 }
3561 }
3562
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3563 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3564 const AV1_COMP *cpi, MACROBLOCK *x,
3565 BLOCK_SIZE bsize) {
3566 const AV1_COMMON *const cm = &cpi->common;
3567 const struct segmentation *const seg = &cm->seg;
3568 MACROBLOCKD *const xd = &x->e_mbd;
3569 MB_MODE_INFO *const mbmi = xd->mi[0];
3570 unsigned char segment_id = mbmi->segment_id;
3571 const SPEED_FEATURES *const sf = &cpi->sf;
3572 REF_SET ref_set = REF_SET_FULL;
3573
3574 if (sf->rt_sf.use_real_time_ref_set)
3575 ref_set = REF_SET_REALTIME;
3576 else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3577 ref_set = REF_SET_REDUCED;
3578
3579 default_skip_mask(mask, ref_set);
3580
3581 int min_pred_mv_sad = INT_MAX;
3582 MV_REFERENCE_FRAME ref_frame;
3583 if (ref_set == REF_SET_REALTIME) {
3584 // For real-time encoding, we only look at a subset of ref frames. So the
3585 // threshold for pruning should be computed from this subset as well.
3586 const int num_rt_refs =
3587 sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3588 for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3589 const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3590 if (ref != INTRA_FRAME) {
3591 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3592 }
3593 }
3594 } else {
3595 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3596 min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3597 }
3598
3599 for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3600 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3601 // Skip checking missing reference in both single and compound reference
3602 // modes.
3603 disable_reference(ref_frame, mask->ref_combo);
3604 } else {
3605 // Skip fixed mv modes for poor references
3606 if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3607 mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3608 }
3609 }
3610 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3611 get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3612 // Reference not used for the segment.
3613 disable_reference(ref_frame, mask->ref_combo);
3614 }
3615 }
3616 // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3617 // is disabled for this segment. This is to prevent the possibility that we
3618 // end up unable to pick any mode.
3619 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3620 // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3621 // unless ARNR filtering is enabled in which case we want
3622 // an unfiltered alternative. We allow near/nearest as well
3623 // because they may result in zero-zero MVs but be cheaper.
3624 if (cpi->rc.is_src_frame_alt_ref &&
3625 (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3626 disable_inter_references_except_altref(mask->ref_combo);
3627
3628 mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3629 const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3630 int_mv near_mv, nearest_mv, global_mv;
3631 get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3632 &x->mbmi_ext);
3633 get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3634 get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3635
3636 if (near_mv.as_int != global_mv.as_int)
3637 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3638 if (nearest_mv.as_int != global_mv.as_int)
3639 mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3640 }
3641 }
3642
3643 if (cpi->rc.is_src_frame_alt_ref) {
3644 if (sf->inter_sf.alt_ref_search_fp &&
3645 (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3646 mask->pred_modes[ALTREF_FRAME] = 0;
3647 disable_inter_references_except_altref(mask->ref_combo);
3648 disable_reference(INTRA_FRAME, mask->ref_combo);
3649 }
3650 }
3651
3652 if (sf->inter_sf.alt_ref_search_fp) {
3653 if (!cm->show_frame && x->best_pred_mv_sad < INT_MAX) {
3654 int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 3);
3655 // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3656 // those are past frames
3657 for (ref_frame = BWDREF_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3658 if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3659 0)
3660 if (x->pred_mv_sad[ref_frame] > sad_thresh)
3661 mask->pred_modes[ref_frame] |= INTER_ALL;
3662 }
3663 }
3664 }
3665
3666 if (bsize > sf->part_sf.max_intra_bsize) {
3667 disable_reference(INTRA_FRAME, mask->ref_combo);
3668 }
3669
3670 mask->pred_modes[INTRA_FRAME] |=
3671 ~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
3672 }
3673
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3674 static AOM_INLINE void init_neighbor_pred_buf(
3675 const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3676 int is_hbd) {
3677 if (is_hbd) {
3678 const int len = sizeof(uint16_t);
3679 args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3680 args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3681 (MAX_SB_SQUARE >> 1) * len);
3682 args->above_pred_buf[2] =
3683 CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3684 args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3685 args->left_pred_buf[1] =
3686 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3687 args->left_pred_buf[2] =
3688 CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3689 } else {
3690 args->above_pred_buf[0] = obmc_buffer->above_pred;
3691 args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3692 args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3693 args->left_pred_buf[0] = obmc_buffer->left_pred;
3694 args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3695 args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3696 }
3697 }
3698
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3699 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3700 MV_REFERENCE_FRAME ref_frame) {
3701 const AV1_COMMON *const cm = &cpi->common;
3702 MV_REFERENCE_FRAME rf[2];
3703 av1_set_ref_frame(rf, ref_frame);
3704
3705 if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3706
3707 if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3708 cm->cur_frame->ref_display_order_hint)) {
3709 return 1;
3710 }
3711
3712 return 0;
3713 }
3714
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3715 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3716 int ref_frame, int skip_ref_frame_mask) {
3717 for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3718 if (!(skip_ref_frame_mask & (1 << r))) {
3719 const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3720 if (rf[0] == ref_frame || rf[1] == ref_frame) {
3721 return 1;
3722 }
3723 }
3724 }
3725 return 0;
3726 }
3727
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3728 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3729 const MB_MODE_INFO *mi_cache) {
3730 if (!mi_cache) {
3731 return 0;
3732 }
3733
3734 if (ref_frame < REF_FRAMES) {
3735 return (ref_frame == mi_cache->ref_frame[0] ||
3736 ref_frame == mi_cache->ref_frame[1]);
3737 }
3738
3739 // if we are here, then the current mode is compound.
3740 MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
3741 return ref_frame == cached_ref_type;
3742 }
3743
3744 // Please add/modify parameter setting in this function, making it consistent
3745 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])3746 static AOM_INLINE void set_params_rd_pick_inter_mode(
3747 const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
3748 BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
3749 unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
3750 struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
3751 const AV1_COMMON *const cm = &cpi->common;
3752 MACROBLOCKD *const xd = &x->e_mbd;
3753 MB_MODE_INFO *const mbmi = xd->mi[0];
3754 MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3755 unsigned char segment_id = mbmi->segment_id;
3756
3757 init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
3758 av1_collect_neighbors_ref_counts(xd);
3759 estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
3760 ref_costs_comp);
3761
3762 const int mi_row = xd->mi_row;
3763 const int mi_col = xd->mi_col;
3764 x->best_pred_mv_sad = INT_MAX;
3765
3766 for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
3767 ++ref_frame) {
3768 x->pred_mv_sad[ref_frame] = INT_MAX;
3769 mbmi_ext->mode_context[ref_frame] = 0;
3770 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3771 if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
3772 // Skip the ref frame if the mask says skip and the ref is not used by
3773 // compound ref.
3774 if (skip_ref_frame_mask & (1 << ref_frame) &&
3775 !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
3776 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
3777 continue;
3778 }
3779 assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
3780 setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
3781 }
3782 // Store the best pred_mv_sad across all past frames
3783 if (cpi->sf.inter_sf.alt_ref_search_fp &&
3784 cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3785 x->best_pred_mv_sad =
3786 AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3787 }
3788
3789 if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
3790 // No second reference on RT ref set, so no need to initialize
3791 for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
3792 ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
3793 mbmi_ext->mode_context[ref_frame] = 0;
3794 mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3795 const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
3796 if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
3797 (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
3798 continue;
3799 }
3800
3801 if (skip_ref_frame_mask & (1 << ref_frame) &&
3802 !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
3803 continue;
3804 }
3805 // Ref mv list population is not required, when compound references are
3806 // pruned.
3807 if (prune_ref_frame(cpi, x, ref_frame)) continue;
3808
3809 av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3810 xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3811 mbmi_ext->mode_context);
3812 // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3813 // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3814 av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3815 }
3816 }
3817
3818 av1_count_overlappable_neighbors(cm, xd);
3819 const FRAME_UPDATE_TYPE update_type =
3820 get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
3821 const int prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
3822 cpi->sf.inter_sf.prune_obmc_prob_thresh;
3823 if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
3824 if (check_num_overlappable_neighbors(mbmi) &&
3825 is_motion_variation_allowed_bsize(bsize)) {
3826 int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3827 int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3828 MAX_SB_SIZE >> 1 };
3829 int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3830 MAX_SB_SIZE >> 1 };
3831 int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3832 av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
3833 dst_width1, dst_height1,
3834 args->above_pred_stride);
3835 av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
3836 dst_width2, dst_height2,
3837 args->left_pred_stride);
3838 const int num_planes = av1_num_planes(cm);
3839 av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
3840 mi_col, 0, num_planes);
3841 calc_target_weighted_pred(
3842 cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
3843 args->left_pred_buf[0], args->left_pred_stride[0]);
3844 }
3845 }
3846
3847 init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
3848
3849 // Set params for mode evaluation
3850 set_mode_eval_params(cpi, x, MODE_EVAL);
3851
3852 x->comp_rd_stats_idx = 0;
3853
3854 for (int idx = 0; idx < REF_FRAMES; idx++) {
3855 args->best_single_sse_in_refs[idx] = INT32_MAX;
3856 }
3857 }
3858
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)3859 static AOM_INLINE void init_inter_mode_search_state(
3860 InterModeSearchState *search_state, const AV1_COMP *cpi,
3861 const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
3862 init_intra_mode_search_state(&search_state->intra_search_state);
3863 av1_invalid_rd_stats(&search_state->best_y_rdcost);
3864
3865 search_state->best_rd = best_rd_so_far;
3866 search_state->best_skip_rd[0] = INT64_MAX;
3867 search_state->best_skip_rd[1] = INT64_MAX;
3868
3869 av1_zero(search_state->best_mbmode);
3870
3871 search_state->best_rate_y = INT_MAX;
3872
3873 search_state->best_rate_uv = INT_MAX;
3874
3875 search_state->best_mode_skippable = 0;
3876
3877 search_state->best_skip2 = 0;
3878
3879 search_state->best_mode_index = THR_INVALID;
3880
3881 const MACROBLOCKD *const xd = &x->e_mbd;
3882 const MB_MODE_INFO *const mbmi = xd->mi[0];
3883 const unsigned char segment_id = mbmi->segment_id;
3884
3885 search_state->num_available_refs = 0;
3886 memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
3887 memset(search_state->dist_order_refs, -1,
3888 sizeof(search_state->dist_order_refs));
3889
3890 for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
3891 search_state->mode_threshold[i] = 0;
3892 const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
3893 for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3894 search_state->mode_threshold[i] =
3895 ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
3896 RD_THRESH_FAC_FRAC_BITS;
3897
3898 search_state->best_intra_rd = INT64_MAX;
3899
3900 search_state->best_pred_sse = UINT_MAX;
3901
3902 av1_zero(search_state->single_newmv);
3903 av1_zero(search_state->single_newmv_rate);
3904 av1_zero(search_state->single_newmv_valid);
3905 for (int i = 0; i < MB_MODE_COUNT; ++i) {
3906 for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
3907 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3908 search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
3909 search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
3910 }
3911 }
3912 }
3913
3914 for (int dir = 0; dir < 2; ++dir) {
3915 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3916 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3917 SingleInterModeState *state;
3918
3919 state = &search_state->single_state[dir][mode][ref_frame];
3920 state->ref_frame = NONE_FRAME;
3921 state->rd = INT64_MAX;
3922
3923 state = &search_state->single_state_modelled[dir][mode][ref_frame];
3924 state->ref_frame = NONE_FRAME;
3925 state->rd = INT64_MAX;
3926 }
3927 }
3928 }
3929 for (int dir = 0; dir < 2; ++dir) {
3930 for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3931 for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3932 search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
3933 }
3934 }
3935 }
3936 for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3937 search_state->best_single_rd[ref_frame] = INT64_MAX;
3938 search_state->best_single_mode[ref_frame] = MB_MODE_COUNT;
3939 }
3940 av1_zero(search_state->single_state_cnt);
3941 av1_zero(search_state->single_state_modelled_cnt);
3942
3943 for (int i = 0; i < REFERENCE_MODES; ++i) {
3944 search_state->best_pred_rd[i] = INT64_MAX;
3945 }
3946 }
3947
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)3948 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
3949 const MV_REFERENCE_FRAME *ref_frame,
3950 const PREDICTION_MODE this_mode) {
3951 if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
3952 return true;
3953 }
3954
3955 return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
3956 }
3957
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)3958 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
3959 BLOCK_SIZE bsize,
3960 PREDICTION_MODE curr_mode,
3961 const MV_REFERENCE_FRAME *ref_frames) {
3962 const int comp_pred = ref_frames[1] > INTRA_FRAME;
3963 if (comp_pred) {
3964 if (!is_comp_ref_allowed(bsize)) return 1;
3965 if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
3966 return 1;
3967 }
3968
3969 const AV1_COMMON *const cm = &cpi->common;
3970 if (frame_is_intra_only(cm)) return 1;
3971
3972 const CurrentFrame *const current_frame = &cm->current_frame;
3973 if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
3974
3975 const struct segmentation *const seg = &cm->seg;
3976 const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
3977 // Do not allow compound prediction if the segment level reference frame
3978 // feature is in use as in this case there can only be one reference.
3979 if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
3980 }
3981
3982 if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
3983 // Mode must be compatible
3984 if (!is_interintra_allowed_bsize(bsize)) return 1;
3985 if (!is_interintra_allowed_mode(curr_mode)) return 1;
3986 }
3987
3988 return 0;
3989 }
3990
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)3991 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
3992 BLOCK_SIZE bsize, int mib_size) {
3993 const int sb_size_mask = mib_size - 1;
3994 const MACROBLOCKD *const xd = &x->e_mbd;
3995 const int mi_row = xd->mi_row;
3996 const int mi_col = xd->mi_col;
3997 const int mi_row_in_sb = mi_row & sb_size_mask;
3998 const int mi_col_in_sb = mi_col & sb_size_mask;
3999 const int mi_w = mi_size_wide[bsize];
4000 const int mi_h = mi_size_high[bsize];
4001 int picked_ref_frames_mask = 0;
4002 for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4003 for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4004 picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4005 }
4006 }
4007 return picked_ref_frames_mask;
4008 }
4009
4010 // Check if reference frame pair of the current block matches with the given
4011 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4012 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4013 const MV_REFERENCE_FRAME *ref_frames) {
4014 return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4015 (ref_frames[1] == mbmi->ref_frame[1]));
4016 }
4017
4018 // Case 1: return 0, means don't skip this mode
4019 // Case 2: return 1, means skip this mode completely
4020 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4021 static int inter_mode_search_order_independent_skip(
4022 const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4023 InterModeSearchState *search_state, int skip_ref_frame_mask,
4024 PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4025 if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4026 return 1;
4027 }
4028
4029 const int ref_type = av1_ref_frame_type(ref_frame);
4030 if (prune_ref_frame(cpi, x, ref_type)) return 1;
4031
4032 // This is only used in motion vector unit test.
4033 if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4034 ref_frame[0] == INTRA_FRAME)
4035 return 1;
4036
4037 const AV1_COMMON *const cm = &cpi->common;
4038 if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4039 return 1;
4040 }
4041
4042 // Reuse the prediction mode in cache
4043 if (x->use_mb_mode_cache) {
4044 const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4045 const PREDICTION_MODE cached_mode = cached_mi->mode;
4046 const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4047 const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4048
4049 // If the cached mode is intra, then we just need to match the mode.
4050 if (is_mode_intra(cached_mode) && mode != cached_mode) {
4051 return 1;
4052 }
4053
4054 // If the cached mode is single inter mode, then we match the mode and
4055 // reference frame.
4056 if (cached_mode_is_single) {
4057 if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4058 return 1;
4059 }
4060 } else {
4061 // If the cached mode is compound, then we need to consider several cases.
4062 const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4063 if (mode_is_single) {
4064 // If the mode is single, we know the modes can't match. But we might
4065 // still want to search it if compound mode depends on the current mode.
4066 int skip_motion_mode_only = 0;
4067 if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4068 skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4069 } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4070 skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4071 } else if (cached_mode == NEW_NEWMV) {
4072 skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4073 ref_frame[0] == cached_frame[1]);
4074 }
4075
4076 return 1 + skip_motion_mode_only;
4077 } else {
4078 // If both modes are compound, then everything must match.
4079 if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4080 ref_frame[1] != cached_frame[1]) {
4081 return 1;
4082 }
4083 }
4084 }
4085 }
4086
4087 const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4088 // If no valid mode has been found so far in PARTITION_NONE when finding a
4089 // valid partition is required, do not skip mode.
4090 if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4091 x->must_find_valid_partition)
4092 return 0;
4093
4094 const SPEED_FEATURES *const sf = &cpi->sf;
4095 // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4096 // frames
4097 if (sf->inter_sf.prune_nearmv_using_neighbors &&
4098 (mode == NEAR_NEARMV || mode == NEARMV)) {
4099 const MACROBLOCKD *const xd = &x->e_mbd;
4100 if (search_state->best_rd != INT64_MAX && xd->left_available &&
4101 xd->up_available) {
4102 const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4103 { 1, 1, 0 },
4104 { 2, 1, 0 } };
4105 const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4106
4107 assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4108 qindex_sub_range < 3);
4109 const int num_ref_frame_pair_match_thresh =
4110 thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4111 [qindex_sub_range];
4112
4113 assert(num_ref_frame_pair_match_thresh <= 2 &&
4114 num_ref_frame_pair_match_thresh >= 0);
4115 int num_ref_frame_pair_match = 0;
4116
4117 num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4118 num_ref_frame_pair_match +=
4119 match_ref_frame_pair(xd->above_mbmi, ref_frame);
4120
4121 // Pruning based on ref frame pair match with neighbors.
4122 if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4123 }
4124 }
4125
4126 int skip_motion_mode = 0;
4127 if (mbmi->partition != PARTITION_NONE) {
4128 int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4129 if (ref_type <= ALTREF_FRAME && skip_ref) {
4130 // Since the compound ref modes depends on the motion estimation result of
4131 // two single ref modes (best mv of single ref modes as the start point),
4132 // if current single ref mode is marked skip, we need to check if it will
4133 // be used in compound ref modes.
4134 if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4135 // Found a not skipped compound ref mode which contains current
4136 // single ref. So this single ref can't be skipped completely
4137 // Just skip its motion mode search, still try its simple
4138 // transition mode.
4139 skip_motion_mode = 1;
4140 skip_ref = 0;
4141 }
4142 }
4143 // If we are reusing the prediction from cache, and the current frame is
4144 // required by the cache, then we cannot prune it.
4145 if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4146 skip_ref = 0;
4147 // If the cache only needs the current reference type for compound
4148 // prediction, then we can skip motion mode search.
4149 skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4150 x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4151 }
4152 if (skip_ref) return 1;
4153 }
4154
4155 if (ref_frame[0] == INTRA_FRAME) {
4156 if (mode != DC_PRED) {
4157 // Disable intra modes other than DC_PRED for blocks with low variance
4158 // Threshold for intra skipping based on source variance
4159 // TODO(debargha): Specialize the threshold for super block sizes
4160 const unsigned int skip_intra_var_thresh = 64;
4161 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4162 x->source_variance < skip_intra_var_thresh)
4163 return 1;
4164 }
4165 }
4166
4167 if (skip_motion_mode) return 2;
4168
4169 return 0;
4170 }
4171
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4172 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4173 const MV_REFERENCE_FRAME *ref_frames,
4174 const AV1_COMMON *cm) {
4175 PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4176 mbmi->ref_mv_idx = 0;
4177 mbmi->mode = curr_mode;
4178 mbmi->uv_mode = UV_DC_PRED;
4179 mbmi->ref_frame[0] = ref_frames[0];
4180 mbmi->ref_frame[1] = ref_frames[1];
4181 pmi->palette_size[0] = 0;
4182 pmi->palette_size[1] = 0;
4183 mbmi->filter_intra_mode_info.use_filter_intra = 0;
4184 mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4185 mbmi->motion_mode = SIMPLE_TRANSLATION;
4186 mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4187 set_default_interp_filters(mbmi, cm->features.interp_filter);
4188 }
4189
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4190 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4191 InterModeSearchState *search_state,
4192 const MB_MODE_INFO *const mbmi) {
4193 int i, j;
4194 const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4195 const PREDICTION_MODE this_mode = mbmi->mode;
4196 const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4197 const int mode_offset = INTER_OFFSET(this_mode);
4198 const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4199
4200 // Simple rd
4201 int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4202 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4203 const int64_t rd =
4204 search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4205 if (rd < simple_rd) simple_rd = rd;
4206 }
4207
4208 // Insertion sort of single_state
4209 const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4210 SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4211 i = search_state->single_state_cnt[dir][mode_offset];
4212 for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4213 state_s[j] = state_s[j - 1];
4214 state_s[j] = this_state_s;
4215 search_state->single_state_cnt[dir][mode_offset]++;
4216
4217 // Modelled rd
4218 int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4219 for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4220 const int64_t rd =
4221 search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4222 if (rd < modelled_rd) modelled_rd = rd;
4223 }
4224
4225 // Insertion sort of single_state_modelled
4226 const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4227 SingleInterModeState *state_m =
4228 search_state->single_state_modelled[dir][mode_offset];
4229 i = search_state->single_state_modelled_cnt[dir][mode_offset];
4230 for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4231 state_m[j] = state_m[j - 1];
4232 state_m[j] = this_state_m;
4233 search_state->single_state_modelled_cnt[dir][mode_offset]++;
4234 }
4235
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4236 static AOM_INLINE void analyze_single_states(
4237 const AV1_COMP *cpi, InterModeSearchState *search_state) {
4238 const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4239 assert(prune_level >= 1);
4240 int i, j, dir, mode;
4241
4242 for (dir = 0; dir < 2; ++dir) {
4243 int64_t best_rd;
4244 SingleInterModeState(*state)[FWD_REFS];
4245 const int prune_factor = prune_level >= 2 ? 6 : 5;
4246
4247 // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4248 // reference frames for all the modes (NEARESTMV and NEARMV may not
4249 // have same motion vectors). Always keep the best of each mode
4250 // because it might form the best possible combination with other mode.
4251 state = search_state->single_state[dir];
4252 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4253 state[INTER_OFFSET(GLOBALMV)][0].rd);
4254 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4255 for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4256 if (state[mode][i].rd != INT64_MAX &&
4257 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4258 state[mode][i].valid = 0;
4259 }
4260 }
4261 }
4262
4263 state = search_state->single_state_modelled[dir];
4264 best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4265 state[INTER_OFFSET(GLOBALMV)][0].rd);
4266 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4267 for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4268 if (state[mode][i].rd != INT64_MAX &&
4269 (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4270 state[mode][i].valid = 0;
4271 }
4272 }
4273 }
4274 }
4275
4276 // Ordering by simple rd first, then by modelled rd
4277 for (dir = 0; dir < 2; ++dir) {
4278 for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4279 const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4280 const int state_cnt_m =
4281 search_state->single_state_modelled_cnt[dir][mode];
4282 SingleInterModeState *state_s = search_state->single_state[dir][mode];
4283 SingleInterModeState *state_m =
4284 search_state->single_state_modelled[dir][mode];
4285 int count = 0;
4286 const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4287 for (i = 0; i < state_cnt_s; ++i) {
4288 if (state_s[i].rd == INT64_MAX) break;
4289 if (state_s[i].valid) {
4290 search_state->single_rd_order[dir][mode][count++] =
4291 state_s[i].ref_frame;
4292 }
4293 }
4294 if (count >= max_candidates) continue;
4295
4296 for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4297 if (state_m[i].rd == INT64_MAX) break;
4298 if (!state_m[i].valid) continue;
4299 const int ref_frame = state_m[i].ref_frame;
4300 int match = 0;
4301 // Check if existing already
4302 for (j = 0; j < count; ++j) {
4303 if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4304 match = 1;
4305 break;
4306 }
4307 }
4308 if (match) continue;
4309 // Check if this ref_frame is removed in simple rd
4310 int valid = 1;
4311 for (j = 0; j < state_cnt_s; ++j) {
4312 if (ref_frame == state_s[j].ref_frame) {
4313 valid = state_s[j].valid;
4314 break;
4315 }
4316 }
4317 if (valid) {
4318 search_state->single_rd_order[dir][mode][count++] = ref_frame;
4319 }
4320 }
4321 }
4322 }
4323 }
4324
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4325 static int compound_skip_get_candidates(
4326 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4327 const int dir, const PREDICTION_MODE mode) {
4328 const int mode_offset = INTER_OFFSET(mode);
4329 const SingleInterModeState *state =
4330 search_state->single_state[dir][mode_offset];
4331 const SingleInterModeState *state_modelled =
4332 search_state->single_state_modelled[dir][mode_offset];
4333
4334 int max_candidates = 0;
4335 for (int i = 0; i < FWD_REFS; ++i) {
4336 if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4337 max_candidates++;
4338 }
4339
4340 int candidates = max_candidates;
4341 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4342 candidates = AOMMIN(2, max_candidates);
4343 }
4344 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4345 if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4346 state[0].ref_frame == state_modelled[0].ref_frame)
4347 candidates = 1;
4348 if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4349 }
4350
4351 if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4352 // Limit the number of candidates to 1 in each direction for compound
4353 // prediction
4354 candidates = AOMMIN(1, candidates);
4355 }
4356 return candidates;
4357 }
4358
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4359 static int compound_skip_by_single_states(
4360 const AV1_COMP *cpi, const InterModeSearchState *search_state,
4361 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4362 const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4363 const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4364 const int mode[2] = { compound_ref0_mode(this_mode),
4365 compound_ref1_mode(this_mode) };
4366 const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4367 const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4368 refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4369 int ref_searched[2] = { 0, 0 };
4370 int ref_mv_match[2] = { 1, 1 };
4371 int i, j;
4372
4373 for (i = 0; i < 2; ++i) {
4374 const SingleInterModeState *state =
4375 search_state->single_state[mode_dir[i]][mode_offset[i]];
4376 const int state_cnt =
4377 search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4378 for (j = 0; j < state_cnt; ++j) {
4379 if (state[j].ref_frame == refs[i]) {
4380 ref_searched[i] = 1;
4381 break;
4382 }
4383 }
4384 }
4385
4386 const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4387 for (i = 0; i < 2; ++i) {
4388 if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4389 continue;
4390 }
4391 const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4392 for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4393 int_mv single_mv;
4394 int_mv comp_mv;
4395 get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4396 &x->mbmi_ext);
4397 get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4398 if (single_mv.as_int != comp_mv.as_int) {
4399 ref_mv_match[i] = 0;
4400 break;
4401 }
4402 }
4403 }
4404
4405 for (i = 0; i < 2; ++i) {
4406 if (!ref_searched[i] || !ref_mv_match[i]) continue;
4407 const int candidates =
4408 compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4409 const MV_REFERENCE_FRAME *ref_order =
4410 search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4411 int match = 0;
4412 for (j = 0; j < candidates; ++j) {
4413 if (refs[i] == ref_order[j]) {
4414 match = 1;
4415 break;
4416 }
4417 }
4418 if (!match) return 1;
4419 }
4420
4421 return 0;
4422 }
4423
4424 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4425 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4426 const MV_REFERENCE_FRAME *ref_frames,
4427 int *const is_ref_match) {
4428 if (is_inter_block(mbmi)) {
4429 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4430 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4431 if (has_second_ref(mbmi)) {
4432 is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4433 is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4434 }
4435 }
4436 }
4437
4438 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4439 static INLINE int compound_skip_using_neighbor_refs(
4440 MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4441 const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4442 // Exclude non-extended compound modes from pruning
4443 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4444 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4445 return 0;
4446
4447 if (prune_ext_comp_using_neighbors >= 3) return 1;
4448
4449 int is_ref_match[2] = { 0 }; // 0 - match for forward refs
4450 // 1 - match for backward refs
4451 // Check if ref frames of this block matches with left neighbor.
4452 if (xd->left_available)
4453 match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4454
4455 // Check if ref frames of this block matches with above neighbor.
4456 if (xd->up_available)
4457 match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4458
4459 // Combine ref frame match with neighbors in forward and backward refs.
4460 const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4461
4462 // Pruning based on ref frame match with neighbors.
4463 if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4464 return 1;
4465 }
4466
4467 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4468 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4469 const PREDICTION_MODE this_mode,
4470 const MV_REFERENCE_FRAME ref_frame,
4471 int64_t this_rd) {
4472 if (this_rd < search_state->best_single_rd[ref_frame]) {
4473 search_state->best_single_rd[ref_frame] = this_rd;
4474 search_state->best_single_mode[ref_frame] = this_mode;
4475 }
4476 }
4477
4478 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4479 static INLINE int skip_compound_using_best_single_mode_ref(
4480 const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4481 const PREDICTION_MODE *best_single_mode,
4482 int prune_comp_using_best_single_mode_ref) {
4483 // Exclude non-extended compound modes from pruning
4484 if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4485 this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4486 return 0;
4487
4488 assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4489 const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4490 // Get ref frame direction corresponding to NEWMV
4491 // 0 - NEWMV corresponding to forward direction
4492 // 1 - NEWMV corresponding to backward direction
4493 const int newmv_dir = comp_mode_ref0 != NEWMV;
4494
4495 // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4496 // have NEWMV as single mode winner.
4497 // Example: For an extended-compound mode,
4498 // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4499 // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4500 // - Avoid pruning this mode, if best single mode corresponding to ref frame
4501 // ALTREF_FRAME is NEWMV
4502 const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4503 if (single_mode == NEWMV) return 0;
4504
4505 // Avoid pruning the compound mode when best single mode is not available
4506 if (prune_comp_using_best_single_mode_ref == 1)
4507 if (single_mode == MB_MODE_COUNT) return 0;
4508 return 1;
4509 }
4510
compare_int64(const void * a,const void * b)4511 static int compare_int64(const void *a, const void *b) {
4512 int64_t a64 = *((int64_t *)a);
4513 int64_t b64 = *((int64_t *)b);
4514 if (a64 < b64) {
4515 return -1;
4516 } else if (a64 == b64) {
4517 return 0;
4518 } else {
4519 return 1;
4520 }
4521 }
4522
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4523 static INLINE void update_search_state(
4524 InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4525 PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4526 const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4527 THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4528 const MACROBLOCKD *xd = &x->e_mbd;
4529 const MB_MODE_INFO *mbmi = xd->mi[0];
4530 const int skip_ctx = av1_get_skip_txfm_context(xd);
4531 const int skip_txfm =
4532 mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4533 const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4534
4535 search_state->best_rd = new_best_rd_stats->rdcost;
4536 search_state->best_mode_index = new_best_mode;
4537 *best_rd_stats_dst = *new_best_rd_stats;
4538 search_state->best_mbmode = *mbmi;
4539 search_state->best_skip2 = skip_txfm;
4540 search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4541 // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4542 // rate_uv because av1_txfm_search process is replaced by rd estimation.
4543 // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4544 // These two values will be updated when av1_txfm_search is called.
4545 if (txfm_search_done) {
4546 search_state->best_rate_y =
4547 new_best_rd_stats_y->rate +
4548 x->mode_costs.skip_txfm_cost[skip_ctx]
4549 [new_best_rd_stats->skip_txfm || skip_txfm];
4550 search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4551 }
4552 search_state->best_y_rdcost = *new_best_rd_stats_y;
4553 memcpy(ctx->blk_skip, txfm_info->blk_skip,
4554 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4555 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4556 }
4557
4558 // Find the best RD for a reference frame (among single reference modes)
4559 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4560 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4561 assert(ref_frame_rd[0] == INT64_MAX);
4562 int64_t ref_copy[REF_FRAMES - 1];
4563 memcpy(ref_copy, ref_frame_rd + 1,
4564 sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4565 qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4566
4567 int64_t cutoff = ref_copy[0];
4568 // The cut-off is within 10% of the best.
4569 if (cutoff != INT64_MAX) {
4570 assert(cutoff < INT64_MAX / 200);
4571 cutoff = (110 * cutoff) / 100;
4572 }
4573 ref_frame_rd[0] = cutoff;
4574 }
4575
4576 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4577 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4578 MV_REFERENCE_FRAME frame1,
4579 MV_REFERENCE_FRAME frame2) {
4580 assert(frame2 > 0);
4581 return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4582 ref_frame_rd[frame2] <= ref_frame_rd[0];
4583 }
4584
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4585 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4586 const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4587 HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4588 PICK_MODE_CONTEXT *const ctx,
4589 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4590 const motion_mode_best_st_candidate *const best_motion_mode_cands,
4591 int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4592 InterModeSearchState *const search_state, int64_t *yrd) {
4593 const AV1_COMMON *const cm = &cpi->common;
4594 const int num_planes = av1_num_planes(cm);
4595 MACROBLOCKD *const xd = &x->e_mbd;
4596 MB_MODE_INFO *const mbmi = xd->mi[0];
4597 InterModesInfo *const inter_modes_info = x->inter_modes_info;
4598 const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4599
4600 for (int cand = 0; cand < num_best_cand; cand++) {
4601 RD_STATS rd_stats;
4602 RD_STATS rd_stats_y;
4603 RD_STATS rd_stats_uv;
4604 av1_init_rd_stats(&rd_stats);
4605 av1_init_rd_stats(&rd_stats_y);
4606 av1_init_rd_stats(&rd_stats_uv);
4607 int rate_mv;
4608
4609 rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4610 args->skip_motion_mode =
4611 best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4612 *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4613 rd_stats.rate =
4614 best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4615
4616 // Continue if the best candidate is compound.
4617 if (!is_inter_singleref_mode(mbmi->mode)) continue;
4618
4619 x->txfm_search_info.skip_txfm = 0;
4620 struct macroblockd_plane *pd = xd->plane;
4621 const BUFFER_SET orig_dst = {
4622 { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4623 { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4624 };
4625
4626 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4627 // Initialize motion mode to simple translation
4628 // Calculation of switchable rate depends on it.
4629 mbmi->motion_mode = 0;
4630 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4631 for (int i = 0; i < num_planes; i++) {
4632 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4633 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4634 }
4635
4636 int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4637 search_state->best_skip_rd[1] };
4638 int64_t this_yrd = INT64_MAX;
4639 int64_t ret_value = motion_mode_rd(
4640 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4641 search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4642 do_tx_search, inter_modes_info, 1, &this_yrd);
4643
4644 if (ret_value != INT64_MAX) {
4645 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4646 const THR_MODES mode_enum = get_prediction_mode_idx(
4647 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4648 // Collect mode stats for multiwinner mode processing
4649 store_winner_mode_stats(
4650 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4651 mode_enum, NULL, bsize, rd_stats.rdcost,
4652 cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4653 if (rd_stats.rdcost < search_state->best_rd) {
4654 *yrd = this_yrd;
4655 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4656 &rd_stats_uv, mode_enum, x, do_tx_search);
4657 if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4658 }
4659 }
4660 }
4661 }
4662
4663 /*!\cond */
4664 // Arguments for speed feature pruning of inter mode search
4665 typedef struct {
4666 int *skip_motion_mode;
4667 mode_skip_mask_t *mode_skip_mask;
4668 InterModeSearchState *search_state;
4669 int skip_ref_frame_mask;
4670 int reach_first_comp_mode;
4671 int mode_thresh_mul_fact;
4672 int num_single_modes_processed;
4673 int prune_cpd_using_sr_stats_ready;
4674 } InterModeSFArgs;
4675 /*!\endcond */
4676
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args)4677 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4678 int64_t *ref_frame_rd, int midx,
4679 InterModeSFArgs *args) {
4680 const SPEED_FEATURES *const sf = &cpi->sf;
4681 MACROBLOCKD *const xd = &x->e_mbd;
4682 // Get the actual prediction mode we are trying in this iteration
4683 const THR_MODES mode_enum = av1_default_mode_order[midx];
4684 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4685 const PREDICTION_MODE this_mode = mode_def->mode;
4686 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4687 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4688 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4689 const int comp_pred = second_ref_frame > INTRA_FRAME;
4690
4691 if (ref_frame == INTRA_FRAME) return 1;
4692
4693 // Check if this mode should be skipped because it is incompatible with the
4694 // current frame
4695 if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
4696 return 1;
4697 const int ret = inter_mode_search_order_independent_skip(
4698 cpi, x, args->mode_skip_mask, args->search_state,
4699 args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
4700 if (ret == 1) return 1;
4701 *(args->skip_motion_mode) = (ret == 2);
4702
4703 // We've reached the first compound prediction mode, get stats from the
4704 // single reference predictors to help with pruning
4705 if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
4706 args->reach_first_comp_mode == 0) {
4707 analyze_single_states(cpi, args->search_state);
4708 args->reach_first_comp_mode = 1;
4709 }
4710
4711 // Prune aggressively when best mode is skippable.
4712 int mul_fact = args->search_state->best_mode_skippable
4713 ? args->mode_thresh_mul_fact
4714 : (1 << MODE_THRESH_QBITS);
4715 int64_t mode_threshold =
4716 (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
4717 MODE_THRESH_QBITS;
4718
4719 if (args->search_state->best_rd < mode_threshold) return 1;
4720
4721 // Skip this compound mode based on the RD results from the single prediction
4722 // modes
4723 if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
4724 if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
4725 ref_frame, second_ref_frame, x))
4726 return 1;
4727 }
4728
4729 if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
4730 // After we done with single reference modes, find the 2nd best RD
4731 // for a reference frame. Only search compound modes that have a reference
4732 // frame at least as good as the 2nd best.
4733 if (!args->prune_cpd_using_sr_stats_ready &&
4734 args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
4735 find_top_ref(ref_frame_rd);
4736 args->prune_cpd_using_sr_stats_ready = 1;
4737 }
4738 if (args->prune_cpd_using_sr_stats_ready &&
4739 !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
4740 return 1;
4741 }
4742
4743 // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
4744 if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
4745 (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
4746 return 1;
4747 }
4748
4749 if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
4750 if (compound_skip_using_neighbor_refs(
4751 xd, this_mode, ref_frames,
4752 sf->inter_sf.prune_ext_comp_using_neighbors))
4753 return 1;
4754 }
4755
4756 if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
4757 if (skip_compound_using_best_single_mode_ref(
4758 this_mode, ref_frames, args->search_state->best_single_mode,
4759 sf->inter_sf.prune_comp_using_best_single_mode_ref))
4760 return 1;
4761 }
4762
4763 if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
4764 const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
4765 if (skip_nearest_near_mv_using_refmv_weight(x, this_mode, ref_frame_type))
4766 return 1;
4767 }
4768
4769 return 0;
4770 }
4771
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)4772 static void record_best_compound(REFERENCE_MODE reference_mode,
4773 RD_STATS *rd_stats, int comp_pred, int rdmult,
4774 InterModeSearchState *search_state,
4775 int compmode_cost) {
4776 int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4777
4778 if (reference_mode == REFERENCE_MODE_SELECT) {
4779 single_rate = rd_stats->rate - compmode_cost;
4780 hybrid_rate = rd_stats->rate;
4781 } else {
4782 single_rate = rd_stats->rate;
4783 hybrid_rate = rd_stats->rate + compmode_cost;
4784 }
4785
4786 single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
4787 hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
4788
4789 if (!comp_pred) {
4790 if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
4791 search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
4792 } else {
4793 if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
4794 search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4795 }
4796 if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
4797 search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4798 }
4799
4800 // Does a transform search over a list of the best inter mode candidates.
4801 // This is called if the original mode search computed an RD estimate
4802 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)4803 static void tx_search_best_inter_candidates(
4804 AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
4805 int64_t best_rd_so_far, BLOCK_SIZE bsize,
4806 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
4807 InterModeSearchState *search_state, RD_STATS *rd_cost,
4808 PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
4809 AV1_COMMON *const cm = &cpi->common;
4810 MACROBLOCKD *const xd = &x->e_mbd;
4811 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4812 const ModeCosts *mode_costs = &x->mode_costs;
4813 const int num_planes = av1_num_planes(cm);
4814 const int skip_ctx = av1_get_skip_txfm_context(xd);
4815 MB_MODE_INFO *const mbmi = xd->mi[0];
4816 InterModesInfo *inter_modes_info = x->inter_modes_info;
4817 inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
4818 search_state->best_rd = best_rd_so_far;
4819 search_state->best_mode_index = THR_INVALID;
4820 // Initialize best mode stats for winner mode processing
4821 x->winner_mode_count = 0;
4822 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
4823 NULL, bsize, best_rd_so_far,
4824 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
4825 inter_modes_info->num =
4826 inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
4827 ? inter_modes_info->num
4828 : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
4829 const int64_t top_est_rd =
4830 inter_modes_info->num > 0
4831 ? inter_modes_info
4832 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
4833 : INT64_MAX;
4834 *yrd = INT64_MAX;
4835 int64_t best_rd_in_this_partition = INT64_MAX;
4836 int num_inter_mode_cands = inter_modes_info->num;
4837 int newmv_mode_evaled = 0;
4838 int max_allowed_cands = INT_MAX;
4839 if (cpi->sf.inter_sf.limit_inter_mode_cands) {
4840 // The bound on the no. of inter mode candidates, beyond which the
4841 // candidates are limited if a newmv mode got evaluated, is set as
4842 // max_allowed_cands + 1.
4843 const int num_allowed_cands[4] = { INT_MAX, 10, 9, 6 };
4844 assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 3);
4845 max_allowed_cands =
4846 num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
4847 }
4848
4849 int num_mode_thresh = INT_MAX;
4850 if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
4851 // Bound the no. of transform searches per prediction mode beyond a
4852 // threshold.
4853 const int num_mode_thresh_ary[3] = { INT_MAX, 4, 3 };
4854 assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 2);
4855 num_mode_thresh =
4856 num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
4857 }
4858
4859 int num_tx_cands = 0;
4860 int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
4861 // Iterate over best inter mode candidates and perform tx search
4862 for (int j = 0; j < num_inter_mode_cands; ++j) {
4863 const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
4864 *mbmi = inter_modes_info->mbmi_arr[data_idx];
4865 int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
4866 if (curr_est_rd * 0.80 > top_est_rd) break;
4867
4868 if (num_tx_cands > num_mode_thresh) {
4869 if ((mbmi->mode != NEARESTMV &&
4870 num_tx_search_modes[mbmi->mode - INTER_MODE_START] >= 1) ||
4871 (mbmi->mode == NEARESTMV &&
4872 num_tx_search_modes[mbmi->mode - INTER_MODE_START] >= 2))
4873 continue;
4874 }
4875
4876 txfm_info->skip_txfm = 0;
4877 set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4878
4879 // Select prediction reference frames.
4880 const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4881 for (int i = 0; i < num_planes; i++) {
4882 xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4883 if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4884 }
4885
4886 // Build the prediction for this mode
4887 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
4888 av1_num_planes(cm) - 1);
4889 if (mbmi->motion_mode == OBMC_CAUSAL) {
4890 av1_build_obmc_inter_predictors_sb(cm, xd);
4891 }
4892
4893 // Initialize RD stats
4894 RD_STATS rd_stats;
4895 RD_STATS rd_stats_y;
4896 RD_STATS rd_stats_uv;
4897 const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
4898 int64_t skip_rd = INT64_MAX;
4899 if (cpi->sf.inter_sf.txfm_rd_gate_level) {
4900 // Check if the mode is good enough based on skip RD
4901 int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
4902 skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
4903 int eval_txfm =
4904 check_txfm_eval(x, bsize, search_state->best_skip_rd[0], skip_rd,
4905 cpi->sf.inter_sf.txfm_rd_gate_level, 0);
4906 if (!eval_txfm) continue;
4907 }
4908
4909 num_tx_cands++;
4910 if (have_newmv_in_inter_mode(mbmi->mode)) newmv_mode_evaled = 1;
4911 num_tx_search_modes[mbmi->mode - INTER_MODE_START]++;
4912 int64_t this_yrd = INT64_MAX;
4913 // Do the transform search
4914 if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
4915 mode_rate, search_state->best_rd)) {
4916 continue;
4917 } else {
4918 const int y_rate =
4919 rd_stats.skip_txfm
4920 ? mode_costs->skip_txfm_cost[skip_ctx][1]
4921 : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
4922 this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
4923
4924 if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
4925 inter_mode_data_push(
4926 tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
4927 rd_stats_y.rate + rd_stats_uv.rate +
4928 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
4929 }
4930 }
4931 rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4932 if (rd_stats.rdcost < best_rd_in_this_partition) {
4933 best_rd_in_this_partition = rd_stats.rdcost;
4934 *yrd = this_yrd;
4935 }
4936
4937 const THR_MODES mode_enum = get_prediction_mode_idx(
4938 mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4939
4940 // Collect mode stats for multiwinner mode processing
4941 const int txfm_search_done = 1;
4942 store_winner_mode_stats(
4943 &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
4944 NULL, bsize, rd_stats.rdcost,
4945 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
4946
4947 if (rd_stats.rdcost < search_state->best_rd) {
4948 update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4949 &rd_stats_uv, mode_enum, x, txfm_search_done);
4950 search_state->best_skip_rd[0] = skip_rd;
4951 // Limit the total number of modes to be evaluated if the first is valid
4952 // and transform skip or compound
4953 if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
4954 if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
4955 // Evaluate more candidates at high quantizers where occurrence of
4956 // transform skip is high.
4957 const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
4958 const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
4959 num_inter_mode_cands =
4960 AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
4961 } else if (!j && has_second_ref(&search_state->best_mbmode)) {
4962 const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
4963 // Evaluate more candidates at low quantizers where occurrence of
4964 // single reference mode is high.
4965 const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
4966 { 10, 7, 5, 3 } };
4967 const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
4968 num_inter_mode_cands = AOMMIN(
4969 max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
4970 }
4971 }
4972 }
4973 // If the number of candidates evaluated exceeds max_allowed_cands, break if
4974 // a newmv mode was evaluated already.
4975 if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
4976 }
4977 }
4978
4979 // Indicates number of winner simple translation modes to be used
4980 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
4981
4982 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
4983 // speed feature. This list consists of modes that have only searched
4984 // SIMPLE_TRANSLATION. The final list will be used to search other motion
4985 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)4986 static void handle_winner_cand(
4987 MB_MODE_INFO *const mbmi,
4988 motion_mode_best_st_candidate *best_motion_mode_cands,
4989 int max_winner_motion_mode_cand, int64_t this_rd,
4990 motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
4991 // Number of current motion mode candidates in list
4992 const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
4993 int valid_motion_mode_cand_loc = num_motion_mode_cand;
4994
4995 // find the best location to insert new motion mode candidate
4996 for (int j = 0; j < num_motion_mode_cand; j++) {
4997 if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
4998 valid_motion_mode_cand_loc = j;
4999 break;
5000 }
5001 }
5002
5003 // Insert motion mode if location is found
5004 if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5005 if (num_motion_mode_cand > 0 &&
5006 valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5007 memmove(
5008 &best_motion_mode_cands
5009 ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5010 &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5011 (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5012 valid_motion_mode_cand_loc) *
5013 sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5014 motion_mode_cand->mbmi = *mbmi;
5015 motion_mode_cand->rd_cost = this_rd;
5016 motion_mode_cand->skip_motion_mode = skip_motion_mode;
5017 best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5018 *motion_mode_cand;
5019 best_motion_mode_cands->num_motion_mode_cand =
5020 AOMMIN(max_winner_motion_mode_cand,
5021 best_motion_mode_cands->num_motion_mode_cand + 1);
5022 }
5023 }
5024
5025 /*!\brief Search intra modes in interframes
5026 *
5027 * \ingroup intra_mode_search
5028 *
5029 * This function searches for the best intra mode when the current frame is an
5030 * interframe. This function however does *not* handle luma palette mode.
5031 * Palette mode is currently handled by \ref av1_search_palette_mode.
5032 *
5033 * This function will first iterate through the luma mode candidates to find the
5034 * best luma intra mode. Once the best luma mode it's found, it will then search
5035 * for the best chroma mode. Because palette mode is currently not handled by
5036 * here, a cache of uv mode is stored in
5037 * InterModeSearchState::intra_search_state so it can be reused later by \ref
5038 * av1_search_palette_mode.
5039 *
5040 * \return Returns the rdcost of the current intra-mode if it's available,
5041 * otherwise returns INT64_MAX. The corresponding values in x->e_mbd.mi[0],
5042 * rd_stats, rd_stats_y/uv, and best_intra_rd are also updated. Moreover, in the
5043 * first evocation of the function, the chroma intra mode result is cached in
5044 * intra_search_state to be used in subsequent calls. In the first evaluation
5045 * with directional mode, a prune_mask computed with histogram of gradient is
5046 * also stored in intra_search_state.
5047 *
5048 * \param[in,out] search_state Struct keep track of the prediction mode
5049 * search state in interframe.
5050 *
5051 * \param[in] cpi Top-level encoder structure.
5052 * \param[in] x Pointer to struct holding all the data for
5053 * the current prediction block.
5054 * \param[out] rd_cost Stores the best rd_cost among all the
5055 * prediction modes searched.
5056 * \param[in] bsize Current block size.
5057 * \param[in,out] ctx Structure to hold the number of 4x4 blks to
5058 * copy the tx_type and txfm_skip arrays.
5059 * for only the Y plane.
5060 * \param[in,out] sf_args Stores the list of intra mode candidates
5061 * to be searched.
5062 * \param[in] intra_ref_frame_cost The entropy cost for signaling that the
5063 * current ref frame is an intra frame.
5064 * \param[in] yrd_threshold The rdcost threshold for luma intra mode to
5065 * terminate chroma intra mode search.
5066 *
5067 * \return Returns INT64_MAX if the determined motion mode is invalid and the
5068 * current motion mode being tested should be skipped. It returns 0 if the
5069 * motion mode search is a success.
5070 */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5071 static AOM_INLINE void search_intra_modes_in_interframe(
5072 InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5073 RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5074 InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5075 int64_t yrd_threshold) {
5076 const AV1_COMMON *const cm = &cpi->common;
5077 const SPEED_FEATURES *const sf = &cpi->sf;
5078 MACROBLOCKD *const xd = &x->e_mbd;
5079 MB_MODE_INFO *const mbmi = xd->mi[0];
5080 IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5081
5082 int is_best_y_mode_intra = 0;
5083 RD_STATS best_intra_rd_stats_y;
5084 int64_t best_rd_y = INT64_MAX;
5085 int best_mode_cost_y = -1;
5086 MB_MODE_INFO best_mbmi = *xd->mi[0];
5087 THR_MODES best_mode_enum = THR_INVALID;
5088 uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5089 uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5090 const int num_4x4 = bsize_to_num_blk(bsize);
5091
5092 // Performs luma search
5093 int64_t best_model_rd = INT64_MAX;
5094 int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5095 for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5096 top_intra_model_rd[i] = INT64_MAX;
5097 }
5098 for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5099 if (sf->intra_sf.skip_intra_in_interframe &&
5100 search_state->intra_search_state.skip_intra_modes)
5101 break;
5102 set_y_mode_and_delta_angle(mode_idx, mbmi);
5103 assert(mbmi->mode < INTRA_MODE_END);
5104
5105 // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5106 if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5107 continue;
5108
5109 const THR_MODES mode_enum =
5110 get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5111 if ((!cpi->oxcf.intra_mode_cfg.enable_smooth_intra ||
5112 cpi->sf.intra_sf.disable_smooth_intra) &&
5113 (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5114 mbmi->mode == SMOOTH_V_PRED))
5115 continue;
5116 if (!cpi->oxcf.intra_mode_cfg.enable_paeth_intra &&
5117 mbmi->mode == PAETH_PRED)
5118 continue;
5119 if (av1_is_directional_mode(mbmi->mode) &&
5120 av1_use_angle_delta(bsize) == 0 && mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5121 continue;
5122 const PREDICTION_MODE this_mode = mbmi->mode;
5123
5124 assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5125 assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5126 init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5127 x->txfm_search_info.skip_txfm = 0;
5128
5129 if (this_mode != DC_PRED) {
5130 // Only search the oblique modes if the best so far is
5131 // one of the neighboring directional modes
5132 if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5133 (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5134 if (search_state->best_mode_index != THR_INVALID &&
5135 search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5136 continue;
5137 }
5138 if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5139 if (conditional_skipintra(
5140 this_mode, search_state->intra_search_state.best_intra_mode))
5141 continue;
5142 }
5143 }
5144
5145 RD_STATS intra_rd_stats_y;
5146 int mode_cost_y;
5147 int64_t intra_rd_y = INT64_MAX;
5148 const int is_luma_result_valid = av1_handle_intra_y_mode(
5149 intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5150 &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5151 &best_model_rd, top_intra_model_rd);
5152 if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5153 is_best_y_mode_intra = 1;
5154 if (intra_rd_y < best_rd_y) {
5155 best_intra_rd_stats_y = intra_rd_stats_y;
5156 best_mode_cost_y = mode_cost_y;
5157 best_rd_y = intra_rd_y;
5158 best_mbmi = *mbmi;
5159 best_mode_enum = mode_enum;
5160 memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5161 sizeof(best_blk_skip[0]) * num_4x4);
5162 av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5163 }
5164 }
5165 }
5166
5167 if (!is_best_y_mode_intra) {
5168 return;
5169 }
5170
5171 assert(best_rd_y < INT64_MAX);
5172
5173 // Restores the best luma mode
5174 *mbmi = best_mbmi;
5175 memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5176 sizeof(best_blk_skip[0]) * num_4x4);
5177 av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5178
5179 // Performs chroma search
5180 RD_STATS intra_rd_stats, intra_rd_stats_uv;
5181 av1_init_rd_stats(&intra_rd_stats);
5182 av1_init_rd_stats(&intra_rd_stats_uv);
5183 const int num_planes = av1_num_planes(cm);
5184 if (num_planes > 1) {
5185 const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5186 intra_search_state, cpi, x, bsize, &intra_rd_stats,
5187 &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5188
5189 if (!intra_uv_mode_valid) {
5190 return;
5191 }
5192 }
5193
5194 // Merge the luma and chroma rd stats
5195 assert(best_mode_cost_y >= 0);
5196 intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5197 if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5198 // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5199 // in the tokenonly rate, but for intra blocks, tx_size is always coded
5200 // (prediction granularity), so we account for it in the full rate,
5201 // not the tokenonly rate.
5202 best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5203 }
5204
5205 const ModeCosts *mode_costs = &x->mode_costs;
5206 const PREDICTION_MODE mode = mbmi->mode;
5207 if (num_planes > 1 && xd->is_chroma_ref) {
5208 const int uv_mode_cost =
5209 mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5210 intra_rd_stats.rate +=
5211 intra_rd_stats_uv.rate +
5212 intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5213 }
5214
5215 // Intra block is always coded as non-skip
5216 intra_rd_stats.skip_txfm = 0;
5217 intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5218 // Add in the cost of the no skip flag.
5219 const int skip_ctx = av1_get_skip_txfm_context(xd);
5220 intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5221 // Calculate the final RD estimate for this mode.
5222 const int64_t this_rd =
5223 RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5224 // Keep record of best intra rd
5225 if (this_rd < search_state->best_intra_rd) {
5226 search_state->best_intra_rd = this_rd;
5227 intra_search_state->best_intra_mode = mode;
5228 }
5229
5230 for (int i = 0; i < REFERENCE_MODES; ++i) {
5231 search_state->best_pred_rd[i] =
5232 AOMMIN(search_state->best_pred_rd[i], this_rd);
5233 }
5234
5235 intra_rd_stats.rdcost = this_rd;
5236
5237 // Collect mode stats for multiwinner mode processing
5238 const int txfm_search_done = 1;
5239 store_winner_mode_stats(
5240 &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5241 &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5242 cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5243 if (intra_rd_stats.rdcost < search_state->best_rd) {
5244 update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5245 &best_intra_rd_stats_y, &intra_rd_stats_uv,
5246 best_mode_enum, x, txfm_search_done);
5247 }
5248 }
5249
5250 #if !CONFIG_REALTIME_ONLY
5251 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5252 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5253 static AOM_INLINE void calculate_cost_from_tpl_data(
5254 const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
5255 int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
5256 const AV1_COMMON *const cm = &cpi->common;
5257 // Only consider full SB.
5258 const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5259 const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5260 const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5261 (block_size_high[sb_size] / tpl_bsize_1d);
5262 SuperBlockEnc *sb_enc = &x->sb_enc;
5263 if (sb_enc->tpl_data_count == len) {
5264 const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5265 const int tpl_stride = sb_enc->tpl_stride;
5266 const int tplw = mi_size_wide[tpl_bsize];
5267 const int tplh = mi_size_high[tpl_bsize];
5268 const int nw = mi_size_wide[bsize] / tplw;
5269 const int nh = mi_size_high[bsize] / tplh;
5270 if (nw >= 1 && nh >= 1) {
5271 const int of_h = mi_row % mi_size_high[sb_size];
5272 const int of_w = mi_col % mi_size_wide[sb_size];
5273 const int start = of_h / tplh * tpl_stride + of_w / tplw;
5274
5275 for (int k = 0; k < nh; k++) {
5276 for (int l = 0; l < nw; l++) {
5277 *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5278 *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5279 }
5280 }
5281 *inter_cost /= nw * nh;
5282 *intra_cost /= nw * nh;
5283 }
5284 }
5285 }
5286 #endif // !CONFIG_REALTIME_ONLY
5287
5288 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5289 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,int64_t inter_cost,int64_t intra_cost,int skip_intra_in_interframe)5290 static AOM_INLINE void skip_intra_modes_in_interframe(
5291 AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5292 InterModeSearchState *search_state, int64_t inter_cost, int64_t intra_cost,
5293 int skip_intra_in_interframe) {
5294 MACROBLOCKD *const xd = &x->e_mbd;
5295 // Prune intra search based on best inter mode being transfrom skip.
5296 if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5297 const int qindex_thresh[2] = { 200, MAXQ };
5298 const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5299 if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5300 (x->qindex <= qindex_thresh[ind])) {
5301 search_state->intra_search_state.skip_intra_modes = 1;
5302 return;
5303 } else if ((skip_intra_in_interframe >= 4) &&
5304 (inter_cost < 0 || intra_cost < 0)) {
5305 search_state->intra_search_state.skip_intra_modes = 1;
5306 return;
5307 }
5308 }
5309 // Use ML model to prune intra search.
5310 if (inter_cost >= 0 && intra_cost >= 0) {
5311 const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5312 ? &av1_intrap_nn_config
5313 : &av1_intrap_hd_nn_config;
5314 float nn_features[6];
5315 float scores[2] = { 0.0f };
5316
5317 nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5318 nn_features[1] = (float)mi_size_wide_log2[bsize];
5319 nn_features[2] = (float)mi_size_high_log2[bsize];
5320 nn_features[3] = (float)intra_cost;
5321 nn_features[4] = (float)inter_cost;
5322 const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5323 const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5324 nn_features[5] = (float)(ac_q_max / ac_q);
5325
5326 av1_nn_predict(nn_features, nn_config, 1, scores);
5327
5328 // For two parameters, the max prob returned from av1_nn_softmax equals
5329 // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5330 // calling of av1_nn_softmax.
5331 const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5332 assert(skip_intra_in_interframe <= 5);
5333 if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5334 search_state->intra_search_state.skip_intra_modes = 1;
5335 }
5336 }
5337 }
5338
5339 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5340 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5341 struct macroblock *x, struct RD_STATS *rd_cost,
5342 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5343 int64_t best_rd_so_far) {
5344 AV1_COMMON *const cm = &cpi->common;
5345 const FeatureFlags *const features = &cm->features;
5346 const int num_planes = av1_num_planes(cm);
5347 const SPEED_FEATURES *const sf = &cpi->sf;
5348 MACROBLOCKD *const xd = &x->e_mbd;
5349 MB_MODE_INFO *const mbmi = xd->mi[0];
5350 TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5351 int i;
5352 const ModeCosts *mode_costs = &x->mode_costs;
5353 const int *comp_inter_cost =
5354 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5355
5356 InterModeSearchState search_state;
5357 init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5358 INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5359 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5360 INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5361 };
5362 HandleInterModeArgs args = { { NULL },
5363 { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5364 { NULL },
5365 { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5366 MAX_SB_SIZE >> 1 },
5367 NULL,
5368 NULL,
5369 NULL,
5370 search_state.modelled_rd,
5371 INT_MAX,
5372 INT_MAX,
5373 search_state.simple_rd,
5374 0,
5375 interintra_modes,
5376 { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5377 0,
5378 -1,
5379 -1,
5380 -1,
5381 { 0 },
5382 { 0 } };
5383 for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5384 // Indicates the appropriate number of simple translation winner modes for
5385 // exhaustive motion mode evaluation
5386 const int max_winner_motion_mode_cand =
5387 num_winner_motion_modes[cpi->sf.winner_mode_sf
5388 .motion_mode_for_winner_cand];
5389 assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5390 motion_mode_candidate motion_mode_cand;
5391 motion_mode_best_st_candidate best_motion_mode_cands;
5392 // Initializing the number of motion mode candidates to zero.
5393 best_motion_mode_cands.num_motion_mode_cand = 0;
5394 for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5395 best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5396
5397 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5398
5399 av1_invalid_rd_stats(rd_cost);
5400
5401 for (i = 0; i < REF_FRAMES; ++i) {
5402 x->warp_sample_info[i].num = -1;
5403 }
5404
5405 // Ref frames that are selected by square partition blocks.
5406 int picked_ref_frames_mask = 0;
5407 if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions &&
5408 mbmi->partition != PARTITION_NONE) {
5409 // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5410 // partition blocks. prune_ref_frame_for_rect_partitions >=2
5411 // implies prune for vert, horiz and extended partition blocks.
5412 if ((mbmi->partition != PARTITION_VERT &&
5413 mbmi->partition != PARTITION_HORZ) ||
5414 cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5415 picked_ref_frames_mask =
5416 fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5417 }
5418 }
5419
5420 #if CONFIG_COLLECT_COMPONENT_TIMING
5421 start_timing(cpi, set_params_rd_pick_inter_mode_time);
5422 #endif
5423 // Skip ref frames that never selected by square blocks.
5424 const int skip_ref_frame_mask =
5425 picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5426 mode_skip_mask_t mode_skip_mask;
5427 unsigned int ref_costs_single[REF_FRAMES];
5428 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5429 struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5430 // init params, set frame modes, speed features
5431 set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5432 skip_ref_frame_mask, ref_costs_single,
5433 ref_costs_comp, yv12_mb);
5434 #if CONFIG_COLLECT_COMPONENT_TIMING
5435 end_timing(cpi, set_params_rd_pick_inter_mode_time);
5436 #endif
5437
5438 int64_t best_est_rd = INT64_MAX;
5439 const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5440 // If do_tx_search is 0, only estimated RD should be computed.
5441 // If do_tx_search is 1, all modes have TX search performed.
5442 const int do_tx_search =
5443 !((cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5444 (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 &&
5445 num_pels_log2_lookup[bsize] > 8) ||
5446 cpi->sf.rt_sf.force_tx_search_off);
5447 InterModesInfo *inter_modes_info = x->inter_modes_info;
5448 inter_modes_info->num = 0;
5449
5450 // Temporary buffers used by handle_inter_mode().
5451 uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5452
5453 // The best RD found for the reference frame, among single reference modes.
5454 // Note that the 0-th element will contain a cut-off that is later used
5455 // to determine if we should skip a compound mode.
5456 int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5457 INT64_MAX, INT64_MAX, INT64_MAX,
5458 INT64_MAX, INT64_MAX };
5459
5460 // Prepared stats used later to check if we could skip intra mode eval.
5461 int64_t inter_cost = -1;
5462 int64_t intra_cost = -1;
5463 // Need to tweak the threshold for hdres speed 0 & 1.
5464 const int mi_row = xd->mi_row;
5465 const int mi_col = xd->mi_col;
5466
5467 // Obtain the relevant tpl stats for pruning inter modes
5468 PruneInfoFromTpl inter_cost_info_from_tpl;
5469 #if !CONFIG_REALTIME_ONLY
5470 if (cpi->sf.inter_sf.prune_inter_modes_based_on_tpl) {
5471 // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5472 // prune_ref_by_selective_ref_frame()
5473 // x->tpl_keep_ref_frame[id] = 0 => ref frame can be pruned in
5474 // prune_ref_by_selective_ref_frame()
5475 // Populating valid_refs[idx] = 1 ensures that
5476 // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5477 // pruned ref frame.
5478 int valid_refs[INTER_REFS_PER_FRAME];
5479 for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5480 const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5481 valid_refs[frame - 1] =
5482 x->tpl_keep_ref_frame[frame] ||
5483 !prune_ref_by_selective_ref_frame(
5484 cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5485 }
5486 av1_zero(inter_cost_info_from_tpl);
5487 get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5488 &inter_cost_info_from_tpl);
5489 }
5490
5491 const int do_pruning =
5492 (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5493 if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5494 cpi->oxcf.algo_cfg.enable_tpl_model)
5495 calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5496 &intra_cost);
5497 #endif // !CONFIG_REALTIME_ONLY
5498
5499 // Initialize best mode stats for winner mode processing
5500 zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTER,
5501 x->winner_mode_stats);
5502 x->winner_mode_count = 0;
5503 store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5504 NULL, bsize, best_rd_so_far,
5505 cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5506
5507 int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5508 if (sf->inter_sf.prune_inter_modes_if_skippable) {
5509 // Higher multiplication factor values for lower quantizers.
5510 mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5511 }
5512
5513 // Initialize arguments for mode loop speed features
5514 InterModeSFArgs sf_args = { &args.skip_motion_mode,
5515 &mode_skip_mask,
5516 &search_state,
5517 skip_ref_frame_mask,
5518 0,
5519 mode_thresh_mul_fact,
5520 0,
5521 0 };
5522 int64_t best_inter_yrd = INT64_MAX;
5523
5524 // This is the main loop of this function. It loops over all possible inter
5525 // modes and calls handle_inter_mode() to compute the RD for each.
5526 // Here midx is just an iterator index that should not be used by itself
5527 // except to keep track of the number of modes searched. It should be used
5528 // with av1_default_mode_order to get the enum that defines the mode, which
5529 // can be used with av1_mode_defs to get the prediction mode and the ref
5530 // frames.
5531 // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5532 // good speedup for real time case. If we decide to use compound mode in real
5533 // time, maybe we can modify av1_default_mode_order table.
5534 THR_MODES mode_start = THR_INTER_MODE_START;
5535 THR_MODES mode_end = THR_INTER_MODE_END;
5536 const CurrentFrame *const current_frame = &cm->current_frame;
5537 if (current_frame->reference_mode == SINGLE_REFERENCE) {
5538 mode_start = SINGLE_REF_MODE_START;
5539 mode_end = SINGLE_REF_MODE_END;
5540 }
5541
5542 for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5543 // Get the actual prediction mode we are trying in this iteration
5544 const THR_MODES mode_enum = av1_default_mode_order[midx];
5545 const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5546 const PREDICTION_MODE this_mode = mode_def->mode;
5547 const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5548
5549 const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5550 const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5551 const int is_single_pred =
5552 ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5553 const int comp_pred = second_ref_frame > INTRA_FRAME;
5554
5555 init_mbmi(mbmi, this_mode, ref_frames, cm);
5556
5557 txfm_info->skip_txfm = 0;
5558 sf_args.num_single_modes_processed += is_single_pred;
5559 set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5560 #if CONFIG_COLLECT_COMPONENT_TIMING
5561 start_timing(cpi, skip_inter_mode_time);
5562 #endif
5563 // Apply speed features to decide if this inter mode can be skipped
5564 const int is_skip_inter_mode =
5565 skip_inter_mode(cpi, x, bsize, ref_frame_rd, midx, &sf_args);
5566 #if CONFIG_COLLECT_COMPONENT_TIMING
5567 end_timing(cpi, skip_inter_mode_time);
5568 #endif
5569 if (is_skip_inter_mode) continue;
5570
5571 // Select prediction reference frames.
5572 for (i = 0; i < num_planes; i++) {
5573 xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5574 if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5575 }
5576
5577 mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5578 mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5579 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5580 mbmi->ref_mv_idx = 0;
5581
5582 const int64_t ref_best_rd = search_state.best_rd;
5583 RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5584 av1_init_rd_stats(&rd_stats);
5585
5586 const int ref_frame_cost = comp_pred
5587 ? ref_costs_comp[ref_frame][second_ref_frame]
5588 : ref_costs_single[ref_frame];
5589 const int compmode_cost =
5590 is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
5591 const int real_compmode_cost =
5592 cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
5593 ? compmode_cost
5594 : 0;
5595 // Point to variables that are maintained between loop iterations
5596 args.single_newmv = search_state.single_newmv;
5597 args.single_newmv_rate = search_state.single_newmv_rate;
5598 args.single_newmv_valid = search_state.single_newmv_valid;
5599 args.single_comp_cost = real_compmode_cost;
5600 args.ref_frame_cost = ref_frame_cost;
5601
5602 int64_t skip_rd[2] = { search_state.best_skip_rd[0],
5603 search_state.best_skip_rd[1] };
5604 int64_t this_yrd = INT64_MAX;
5605 #if CONFIG_COLLECT_COMPONENT_TIMING
5606 start_timing(cpi, handle_inter_mode_time);
5607 #endif
5608 int64_t this_rd = handle_inter_mode(
5609 cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
5610 ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
5611 inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
5612 &this_yrd);
5613 #if CONFIG_COLLECT_COMPONENT_TIMING
5614 end_timing(cpi, handle_inter_mode_time);
5615 #endif
5616 if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
5617 is_inter_singleref_mode(this_mode)) {
5618 collect_single_states(x, &search_state, mbmi);
5619 }
5620
5621 if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
5622 is_inter_singleref_mode(this_mode))
5623 update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
5624
5625 if (this_rd == INT64_MAX) continue;
5626
5627 if (mbmi->skip_txfm) {
5628 rd_stats_y.rate = 0;
5629 rd_stats_uv.rate = 0;
5630 }
5631
5632 if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
5633 this_rd < ref_frame_rd[ref_frame]) {
5634 ref_frame_rd[ref_frame] = this_rd;
5635 }
5636
5637 // Did this mode help, i.e., is it the new best mode
5638 if (this_rd < search_state.best_rd) {
5639 assert(IMPLIES(comp_pred,
5640 cm->current_frame.reference_mode != SINGLE_REFERENCE));
5641 search_state.best_pred_sse = x->pred_sse[ref_frame];
5642 best_inter_yrd = this_yrd;
5643 update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5644 &rd_stats_uv, mode_enum, x, do_tx_search);
5645 if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
5646 search_state.best_skip_rd[1] = skip_rd[1];
5647 }
5648 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5649 // Add this mode to motion mode candidate list for motion mode search
5650 // if using motion_mode_for_winner_cand speed feature
5651 handle_winner_cand(mbmi, &best_motion_mode_cands,
5652 max_winner_motion_mode_cand, this_rd,
5653 &motion_mode_cand, args.skip_motion_mode);
5654 }
5655
5656 /* keep record of best compound/single-only prediction */
5657 record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
5658 x->rdmult, &search_state, compmode_cost);
5659 }
5660
5661 #if CONFIG_COLLECT_COMPONENT_TIMING
5662 start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5663 #endif
5664 if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5665 // For the single ref winner candidates, evaluate other motion modes (non
5666 // simple translation).
5667 evaluate_motion_mode_for_winner_candidates(
5668 cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
5669 &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
5670 &search_state, &best_inter_yrd);
5671 }
5672 #if CONFIG_COLLECT_COMPONENT_TIMING
5673 end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5674 #endif
5675
5676 #if CONFIG_COLLECT_COMPONENT_TIMING
5677 start_timing(cpi, do_tx_search_time);
5678 #endif
5679 if (do_tx_search != 1) {
5680 // A full tx search has not yet been done, do tx search for
5681 // top mode candidates
5682 tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
5683 yv12_mb, mi_row, mi_col, &search_state,
5684 rd_cost, ctx, &best_inter_yrd);
5685 }
5686 #if CONFIG_COLLECT_COMPONENT_TIMING
5687 end_timing(cpi, do_tx_search_time);
5688 #endif
5689
5690 #if CONFIG_COLLECT_COMPONENT_TIMING
5691 start_timing(cpi, handle_intra_mode_time);
5692 #endif
5693 // Gate intra mode evaluation if best of inter is skip except when source
5694 // variance is extremely low
5695 const unsigned int src_var_thresh_intra_skip = 1;
5696 const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5697 if (skip_intra_in_interframe &&
5698 (x->source_variance > src_var_thresh_intra_skip))
5699 skip_intra_modes_in_interframe(cm, x, bsize, &search_state, inter_cost,
5700 intra_cost, skip_intra_in_interframe);
5701
5702 const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
5703 search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
5704 &sf_args, intra_ref_frame_cost,
5705 best_inter_yrd);
5706 #if CONFIG_COLLECT_COMPONENT_TIMING
5707 end_timing(cpi, handle_intra_mode_time);
5708 #endif
5709
5710 #if CONFIG_COLLECT_COMPONENT_TIMING
5711 start_timing(cpi, refine_winner_mode_tx_time);
5712 #endif
5713 int winner_mode_count =
5714 cpi->sf.winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
5715 // In effect only when fast tx search speed features are enabled.
5716 refine_winner_mode_tx(
5717 cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
5718 &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
5719 search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
5720 #if CONFIG_COLLECT_COMPONENT_TIMING
5721 end_timing(cpi, refine_winner_mode_tx_time);
5722 #endif
5723
5724 // Initialize default mode evaluation params
5725 set_mode_eval_params(cpi, x, DEFAULT_EVAL);
5726
5727 // Only try palette mode when the best mode so far is an intra mode.
5728 const int try_palette =
5729 cpi->oxcf.tool_cfg.enable_palette &&
5730 av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
5731 !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
5732 RD_STATS this_rd_cost;
5733 int this_skippable = 0;
5734 if (try_palette) {
5735 #if CONFIG_COLLECT_COMPONENT_TIMING
5736 start_timing(cpi, av1_search_palette_mode_time);
5737 #endif
5738 this_skippable = av1_search_palette_mode(
5739 &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
5740 ctx, &this_rd_cost, search_state.best_rd);
5741 #if CONFIG_COLLECT_COMPONENT_TIMING
5742 end_timing(cpi, av1_search_palette_mode_time);
5743 #endif
5744 if (this_rd_cost.rdcost < search_state.best_rd) {
5745 search_state.best_mode_index = THR_DC;
5746 mbmi->mv[0].as_int = 0;
5747 rd_cost->rate = this_rd_cost.rate;
5748 rd_cost->dist = this_rd_cost.dist;
5749 rd_cost->rdcost = this_rd_cost.rdcost;
5750 search_state.best_rd = rd_cost->rdcost;
5751 search_state.best_mbmode = *mbmi;
5752 search_state.best_skip2 = 0;
5753 search_state.best_mode_skippable = this_skippable;
5754 memcpy(ctx->blk_skip, txfm_info->blk_skip,
5755 sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
5756 av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5757 }
5758 }
5759
5760 search_state.best_mbmode.skip_mode = 0;
5761 if (cm->current_frame.skip_mode_info.skip_mode_flag &&
5762 is_comp_ref_allowed(bsize)) {
5763 const struct segmentation *const seg = &cm->seg;
5764 unsigned char segment_id = mbmi->segment_id;
5765 if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
5766 rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
5767 }
5768 }
5769
5770 // Make sure that the ref_mv_idx is only nonzero when we're
5771 // using a mode which can support ref_mv_idx
5772 if (search_state.best_mbmode.ref_mv_idx != 0 &&
5773 !(search_state.best_mbmode.mode == NEWMV ||
5774 search_state.best_mbmode.mode == NEW_NEWMV ||
5775 have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
5776 search_state.best_mbmode.ref_mv_idx = 0;
5777 }
5778
5779 if (search_state.best_mode_index == THR_INVALID ||
5780 search_state.best_rd >= best_rd_so_far) {
5781 rd_cost->rate = INT_MAX;
5782 rd_cost->rdcost = INT64_MAX;
5783 return;
5784 }
5785
5786 const InterpFilter interp_filter = features->interp_filter;
5787 assert((interp_filter == SWITCHABLE) ||
5788 (interp_filter ==
5789 search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
5790 !is_inter_block(&search_state.best_mbmode));
5791 assert((interp_filter == SWITCHABLE) ||
5792 (interp_filter ==
5793 search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
5794 !is_inter_block(&search_state.best_mbmode));
5795
5796 if (!cpi->rc.is_src_frame_alt_ref && cpi->sf.inter_sf.adaptive_rd_thresh) {
5797 av1_update_rd_thresh_fact(
5798 cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
5799 search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
5800 }
5801
5802 // macroblock modes
5803 *mbmi = search_state.best_mbmode;
5804 txfm_info->skip_txfm |= search_state.best_skip2;
5805
5806 // Note: this section is needed since the mode may have been forced to
5807 // GLOBALMV by the all-zero mode handling of ref-mv.
5808 if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
5809 // Correct the interp filters for GLOBALMV
5810 if (is_nontrans_global_motion(xd, xd->mi[0])) {
5811 int_interpfilters filters =
5812 av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
5813 assert(mbmi->interp_filters.as_int == filters.as_int);
5814 (void)filters;
5815 }
5816 }
5817
5818 for (i = 0; i < REFERENCE_MODES; ++i) {
5819 if (search_state.best_pred_rd[i] == INT64_MAX) {
5820 search_state.best_pred_diff[i] = INT_MIN;
5821 } else {
5822 search_state.best_pred_diff[i] =
5823 search_state.best_rd - search_state.best_pred_rd[i];
5824 }
5825 }
5826
5827 txfm_info->skip_txfm |= search_state.best_mode_skippable;
5828
5829 assert(search_state.best_mode_index != THR_INVALID);
5830
5831 #if CONFIG_INTERNAL_STATS
5832 store_coding_context(x, ctx, search_state.best_mode_index,
5833 search_state.best_pred_diff,
5834 search_state.best_mode_skippable);
5835 #else
5836 store_coding_context(x, ctx, search_state.best_pred_diff,
5837 search_state.best_mode_skippable);
5838 #endif // CONFIG_INTERNAL_STATS
5839
5840 if (mbmi->palette_mode_info.palette_size[1] > 0) {
5841 assert(try_palette);
5842 av1_restore_uv_color_map(cpi, x);
5843 }
5844 }
5845
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5846 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
5847 TileDataEnc *tile_data, MACROBLOCK *x,
5848 int mi_row, int mi_col,
5849 RD_STATS *rd_cost, BLOCK_SIZE bsize,
5850 PICK_MODE_CONTEXT *ctx,
5851 int64_t best_rd_so_far) {
5852 const AV1_COMMON *const cm = &cpi->common;
5853 const FeatureFlags *const features = &cm->features;
5854 MACROBLOCKD *const xd = &x->e_mbd;
5855 MB_MODE_INFO *const mbmi = xd->mi[0];
5856 unsigned char segment_id = mbmi->segment_id;
5857 const int comp_pred = 0;
5858 int i;
5859 int64_t best_pred_diff[REFERENCE_MODES];
5860 unsigned int ref_costs_single[REF_FRAMES];
5861 unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5862 const ModeCosts *mode_costs = &x->mode_costs;
5863 const int *comp_inter_cost =
5864 mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5865 InterpFilter best_filter = SWITCHABLE;
5866 int64_t this_rd = INT64_MAX;
5867 int rate2 = 0;
5868 const int64_t distortion2 = 0;
5869 (void)mi_row;
5870 (void)mi_col;
5871 (void)tile_data;
5872
5873 av1_collect_neighbors_ref_counts(xd);
5874
5875 estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
5876 ref_costs_comp);
5877
5878 for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5879 for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
5880
5881 rd_cost->rate = INT_MAX;
5882
5883 assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
5884
5885 mbmi->palette_mode_info.palette_size[0] = 0;
5886 mbmi->palette_mode_info.palette_size[1] = 0;
5887 mbmi->filter_intra_mode_info.use_filter_intra = 0;
5888 mbmi->mode = GLOBALMV;
5889 mbmi->motion_mode = SIMPLE_TRANSLATION;
5890 mbmi->uv_mode = UV_DC_PRED;
5891 if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
5892 mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
5893 else
5894 mbmi->ref_frame[0] = LAST_FRAME;
5895 mbmi->ref_frame[1] = NONE_FRAME;
5896 mbmi->mv[0].as_int =
5897 gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
5898 features->allow_high_precision_mv, bsize, mi_col,
5899 mi_row, features->cur_frame_force_integer_mv)
5900 .as_int;
5901 mbmi->tx_size = max_txsize_lookup[bsize];
5902 x->txfm_search_info.skip_txfm = 1;
5903
5904 mbmi->ref_mv_idx = 0;
5905
5906 mbmi->motion_mode = SIMPLE_TRANSLATION;
5907 av1_count_overlappable_neighbors(cm, xd);
5908 if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
5909 int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
5910 mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
5911 // Select the samples according to motion vector difference
5912 if (mbmi->num_proj_ref > 1) {
5913 mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
5914 mbmi->num_proj_ref, bsize);
5915 }
5916 }
5917
5918 const InterpFilter interp_filter = features->interp_filter;
5919 set_default_interp_filters(mbmi, interp_filter);
5920
5921 if (interp_filter != SWITCHABLE) {
5922 best_filter = interp_filter;
5923 } else {
5924 best_filter = EIGHTTAP_REGULAR;
5925 if (av1_is_interp_needed(xd)) {
5926 int rs;
5927 int best_rs = INT_MAX;
5928 for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
5929 mbmi->interp_filters = av1_broadcast_interp_filter(i);
5930 rs = av1_get_switchable_rate(x, xd, interp_filter,
5931 cm->seq_params->enable_dual_filter);
5932 if (rs < best_rs) {
5933 best_rs = rs;
5934 best_filter = mbmi->interp_filters.as_filters.y_filter;
5935 }
5936 }
5937 }
5938 }
5939 // Set the appropriate filter
5940 mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
5941 rate2 += av1_get_switchable_rate(x, xd, interp_filter,
5942 cm->seq_params->enable_dual_filter);
5943
5944 if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
5945 rate2 += comp_inter_cost[comp_pred];
5946
5947 // Estimate the reference frame signaling cost and add it
5948 // to the rolling cost variable.
5949 rate2 += ref_costs_single[LAST_FRAME];
5950 this_rd = RDCOST(x->rdmult, rate2, distortion2);
5951
5952 rd_cost->rate = rate2;
5953 rd_cost->dist = distortion2;
5954 rd_cost->rdcost = this_rd;
5955
5956 if (this_rd >= best_rd_so_far) {
5957 rd_cost->rate = INT_MAX;
5958 rd_cost->rdcost = INT64_MAX;
5959 return;
5960 }
5961
5962 assert((interp_filter == SWITCHABLE) ||
5963 (interp_filter == mbmi->interp_filters.as_filters.y_filter));
5964
5965 if (cpi->sf.inter_sf.adaptive_rd_thresh) {
5966 av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
5967 cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
5968 THR_GLOBALMV, THR_INTER_MODE_START,
5969 THR_INTER_MODE_END, THR_DC, MAX_MODES);
5970 }
5971
5972 av1_zero(best_pred_diff);
5973
5974 #if CONFIG_INTERNAL_STATS
5975 store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
5976 #else
5977 store_coding_context(x, ctx, best_pred_diff, 0);
5978 #endif // CONFIG_INTERNAL_STATS
5979 }
5980
5981 /*!\cond */
5982 struct calc_target_weighted_pred_ctxt {
5983 const OBMCBuffer *obmc_buffer;
5984 const uint8_t *tmp;
5985 int tmp_stride;
5986 int overlap;
5987 };
5988 /*!\endcond */
5989
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)5990 static INLINE void calc_target_weighted_pred_above(
5991 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
5992 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
5993 (void)nb_mi;
5994 (void)num_planes;
5995 (void)rel_mi_row;
5996 (void)dir;
5997
5998 struct calc_target_weighted_pred_ctxt *ctxt =
5999 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6000
6001 const int bw = xd->width << MI_SIZE_LOG2;
6002 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6003
6004 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6005 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6006 const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6007 const int is_hbd = is_cur_buf_hbd(xd);
6008
6009 if (!is_hbd) {
6010 for (int row = 0; row < ctxt->overlap; ++row) {
6011 const uint8_t m0 = mask1d[row];
6012 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6013 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6014 wsrc[col] = m1 * tmp[col];
6015 mask[col] = m0;
6016 }
6017 wsrc += bw;
6018 mask += bw;
6019 tmp += ctxt->tmp_stride;
6020 }
6021 } else {
6022 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6023
6024 for (int row = 0; row < ctxt->overlap; ++row) {
6025 const uint8_t m0 = mask1d[row];
6026 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6027 for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6028 wsrc[col] = m1 * tmp16[col];
6029 mask[col] = m0;
6030 }
6031 wsrc += bw;
6032 mask += bw;
6033 tmp16 += ctxt->tmp_stride;
6034 }
6035 }
6036 }
6037
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6038 static INLINE void calc_target_weighted_pred_left(
6039 MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6040 int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6041 (void)nb_mi;
6042 (void)num_planes;
6043 (void)rel_mi_col;
6044 (void)dir;
6045
6046 struct calc_target_weighted_pred_ctxt *ctxt =
6047 (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6048
6049 const int bw = xd->width << MI_SIZE_LOG2;
6050 const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6051
6052 int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6053 int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6054 const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6055 const int is_hbd = is_cur_buf_hbd(xd);
6056
6057 if (!is_hbd) {
6058 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6059 for (int col = 0; col < ctxt->overlap; ++col) {
6060 const uint8_t m0 = mask1d[col];
6061 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6062 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6063 (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6064 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6065 }
6066 wsrc += bw;
6067 mask += bw;
6068 tmp += ctxt->tmp_stride;
6069 }
6070 } else {
6071 const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6072
6073 for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6074 for (int col = 0; col < ctxt->overlap; ++col) {
6075 const uint8_t m0 = mask1d[col];
6076 const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6077 wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6078 (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6079 mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6080 }
6081 wsrc += bw;
6082 mask += bw;
6083 tmp16 += ctxt->tmp_stride;
6084 }
6085 }
6086 }
6087
6088 // This function has a structure similar to av1_build_obmc_inter_prediction
6089 //
6090 // The OBMC predictor is computed as:
6091 //
6092 // PObmc(x,y) =
6093 // AOM_BLEND_A64(Mh(x),
6094 // AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6095 // PLeft(x, y))
6096 //
6097 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6098 // rounding, this can be written as:
6099 //
6100 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6101 // Mh(x) * Mv(y) * P(x,y) +
6102 // Mh(x) * Cv(y) * Pabove(x,y) +
6103 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6104 //
6105 // Where :
6106 //
6107 // Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6108 // Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6109 //
6110 // This function computes 'wsrc' and 'mask' as:
6111 //
6112 // wsrc(x, y) =
6113 // AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6114 // Mh(x) * Cv(y) * Pabove(x,y) +
6115 // AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6116 //
6117 // mask(x, y) = Mh(x) * Mv(y)
6118 //
6119 // These can then be used to efficiently approximate the error for any
6120 // predictor P in the context of the provided neighbouring predictors by
6121 // computing:
6122 //
6123 // error(x, y) =
6124 // wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6125 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6126 static AOM_INLINE void calc_target_weighted_pred(
6127 const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6128 const uint8_t *above, int above_stride, const uint8_t *left,
6129 int left_stride) {
6130 const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6131 const int bw = xd->width << MI_SIZE_LOG2;
6132 const int bh = xd->height << MI_SIZE_LOG2;
6133 const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6134 int32_t *mask_buf = obmc_buffer->mask;
6135 int32_t *wsrc_buf = obmc_buffer->wsrc;
6136
6137 const int is_hbd = is_cur_buf_hbd(xd);
6138 const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6139
6140 // plane 0 should not be sub-sampled
6141 assert(xd->plane[0].subsampling_x == 0);
6142 assert(xd->plane[0].subsampling_y == 0);
6143
6144 av1_zero_array(wsrc_buf, bw * bh);
6145 for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6146
6147 // handle above row
6148 if (xd->up_available) {
6149 const int overlap =
6150 AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6151 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6152 above_stride, overlap };
6153 foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6154 max_neighbor_obmc[mi_size_wide_log2[bsize]],
6155 calc_target_weighted_pred_above, &ctxt);
6156 }
6157
6158 for (int i = 0; i < bw * bh; ++i) {
6159 wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6160 mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6161 }
6162
6163 // handle left column
6164 if (xd->left_available) {
6165 const int overlap =
6166 AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6167 struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6168 left_stride, overlap };
6169 foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6170 max_neighbor_obmc[mi_size_high_log2[bsize]],
6171 calc_target_weighted_pred_left, &ctxt);
6172 }
6173
6174 if (!is_hbd) {
6175 const uint8_t *src = x->plane[0].src.buf;
6176
6177 for (int row = 0; row < bh; ++row) {
6178 for (int col = 0; col < bw; ++col) {
6179 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6180 }
6181 wsrc_buf += bw;
6182 src += x->plane[0].src.stride;
6183 }
6184 } else {
6185 const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6186
6187 for (int row = 0; row < bh; ++row) {
6188 for (int col = 0; col < bw; ++col) {
6189 wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6190 }
6191 wsrc_buf += bw;
6192 src += x->plane[0].src.stride;
6193 }
6194 }
6195 }
6196
6197 /* Use standard 3x3 Sobel matrix. Macro so it can be used for either high or
6198 low bit-depth arrays. */
6199 #define SOBEL_X(src, stride, i, j) \
6200 ((src)[((i)-1) + (stride) * ((j)-1)] - \
6201 (src)[((i) + 1) + (stride) * ((j)-1)] + /* NOLINT */ \
6202 2 * (src)[((i)-1) + (stride) * (j)] - /* NOLINT */ \
6203 2 * (src)[((i) + 1) + (stride) * (j)] + /* NOLINT */ \
6204 (src)[((i)-1) + (stride) * ((j) + 1)] - /* NOLINT */ \
6205 (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6206 #define SOBEL_Y(src, stride, i, j) \
6207 ((src)[((i)-1) + (stride) * ((j)-1)] + \
6208 2 * (src)[(i) + (stride) * ((j)-1)] + /* NOLINT */ \
6209 (src)[((i) + 1) + (stride) * ((j)-1)] - /* NOLINT */ \
6210 (src)[((i)-1) + (stride) * ((j) + 1)] - /* NOLINT */ \
6211 2 * (src)[(i) + (stride) * ((j) + 1)] - /* NOLINT */ \
6212 (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6213
av1_sobel(const uint8_t * input,int stride,int i,int j,bool high_bd)6214 sobel_xy av1_sobel(const uint8_t *input, int stride, int i, int j,
6215 bool high_bd) {
6216 int16_t s_x;
6217 int16_t s_y;
6218 if (high_bd) {
6219 const uint16_t *src = CONVERT_TO_SHORTPTR(input);
6220 s_x = SOBEL_X(src, stride, i, j);
6221 s_y = SOBEL_Y(src, stride, i, j);
6222 } else {
6223 s_x = SOBEL_X(input, stride, i, j);
6224 s_y = SOBEL_Y(input, stride, i, j);
6225 }
6226 sobel_xy r = { .x = s_x, .y = s_y };
6227 return r;
6228 }
6229
6230 // 8-tap Gaussian convolution filter with sigma = 1.3, sums to 128,
6231 // all co-efficients must be even.
6232 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 2, 12, 30, 40,
6233 30, 12, 2, 0 };
6234
av1_gaussian_blur(const uint8_t * src,int src_stride,int w,int h,uint8_t * dst,bool high_bd,int bd)6235 void av1_gaussian_blur(const uint8_t *src, int src_stride, int w, int h,
6236 uint8_t *dst, bool high_bd, int bd) {
6237 ConvolveParams conv_params = get_conv_params(0, 0, bd);
6238 InterpFilterParams filter = { .filter_ptr = gauss_filter,
6239 .taps = 8,
6240 .interp_filter = EIGHTTAP_REGULAR };
6241 // Requirements from the vector-optimized implementations.
6242 assert(h % 4 == 0);
6243 assert(w % 8 == 0);
6244 // Because we use an eight tap filter, the stride should be at least 7 + w.
6245 assert(src_stride >= w + 7);
6246 #if CONFIG_AV1_HIGHBITDEPTH
6247 if (high_bd) {
6248 av1_highbd_convolve_2d_sr(CONVERT_TO_SHORTPTR(src), src_stride,
6249 CONVERT_TO_SHORTPTR(dst), w, w, h, &filter,
6250 &filter, 0, 0, &conv_params, bd);
6251 } else {
6252 av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6253 &conv_params);
6254 }
6255 #else
6256 (void)high_bd;
6257 av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6258 &conv_params);
6259 #endif
6260 }
6261
edge_probability(const uint8_t * input,int w,int h,bool high_bd,int bd)6262 static EdgeInfo edge_probability(const uint8_t *input, int w, int h,
6263 bool high_bd, int bd) {
6264 // The probability of an edge in the whole image is the same as the highest
6265 // probability of an edge for any individual pixel. Use Sobel as the metric
6266 // for finding an edge.
6267 uint16_t highest = 0;
6268 uint16_t highest_x = 0;
6269 uint16_t highest_y = 0;
6270 // Ignore the 1 pixel border around the image for the computation.
6271 for (int j = 1; j < h - 1; ++j) {
6272 for (int i = 1; i < w - 1; ++i) {
6273 sobel_xy g = av1_sobel(input, w, i, j, high_bd);
6274 // Scale down to 8-bit to get same output regardless of bit depth.
6275 int16_t g_x = g.x >> (bd - 8);
6276 int16_t g_y = g.y >> (bd - 8);
6277 uint16_t magnitude = (uint16_t)sqrt(g_x * g_x + g_y * g_y);
6278 highest = AOMMAX(highest, magnitude);
6279 highest_x = AOMMAX(highest_x, g_x);
6280 highest_y = AOMMAX(highest_y, g_y);
6281 }
6282 }
6283 EdgeInfo ei = { .magnitude = highest, .x = highest_x, .y = highest_y };
6284 return ei;
6285 }
6286
6287 /* Uses most of the Canny edge detection algorithm to find if there are any
6288 * edges in the image.
6289 */
av1_edge_exists(const uint8_t * src,int src_stride,int w,int h,bool high_bd,int bd)6290 EdgeInfo av1_edge_exists(const uint8_t *src, int src_stride, int w, int h,
6291 bool high_bd, int bd) {
6292 if (w < 3 || h < 3) {
6293 EdgeInfo n = { .magnitude = 0, .x = 0, .y = 0 };
6294 return n;
6295 }
6296 uint8_t *blurred;
6297 if (high_bd) {
6298 blurred = CONVERT_TO_BYTEPTR(aom_memalign(32, sizeof(uint16_t) * w * h));
6299 } else {
6300 blurred = (uint8_t *)aom_memalign(32, sizeof(uint8_t) * w * h);
6301 }
6302 av1_gaussian_blur(src, src_stride, w, h, blurred, high_bd, bd);
6303 // Skip the non-maximum suppression step in Canny edge detection. We just
6304 // want a probability of an edge existing in the buffer, which is determined
6305 // by the strongest edge in it -- we don't need to eliminate the weaker
6306 // edges. Use Sobel for the edge detection.
6307 EdgeInfo prob = edge_probability(blurred, w, h, high_bd, bd);
6308 if (high_bd) {
6309 aom_free(CONVERT_TO_SHORTPTR(blurred));
6310 } else {
6311 aom_free(blurred);
6312 }
6313 return prob;
6314 }
6315