1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/system_state.h"
26 
27 #include "av1/common/av1_common_int.h"
28 #include "av1/common/cfl.h"
29 #include "av1/common/blockd.h"
30 #include "av1/common/common.h"
31 #include "av1/common/common_data.h"
32 #include "av1/common/entropy.h"
33 #include "av1/common/entropymode.h"
34 #include "av1/common/idct.h"
35 #include "av1/common/mvref_common.h"
36 #include "av1/common/obmc.h"
37 #include "av1/common/pred_common.h"
38 #include "av1/common/quant_common.h"
39 #include "av1/common/reconinter.h"
40 #include "av1/common/reconintra.h"
41 #include "av1/common/scan.h"
42 #include "av1/common/seg_common.h"
43 #include "av1/common/txb_common.h"
44 #include "av1/common/warped_motion.h"
45 
46 #include "av1/encoder/aq_variance.h"
47 #include "av1/encoder/av1_quantize.h"
48 #include "av1/encoder/cost.h"
49 #include "av1/encoder/compound_type.h"
50 #include "av1/encoder/encodemb.h"
51 #include "av1/encoder/encodemv.h"
52 #include "av1/encoder/encoder.h"
53 #include "av1/encoder/encodetxb.h"
54 #include "av1/encoder/hybrid_fwd_txfm.h"
55 #include "av1/encoder/interp_search.h"
56 #include "av1/encoder/intra_mode_search.h"
57 #include "av1/encoder/intra_mode_search_utils.h"
58 #include "av1/encoder/mcomp.h"
59 #include "av1/encoder/ml.h"
60 #include "av1/encoder/mode_prune_model_weights.h"
61 #include "av1/encoder/model_rd.h"
62 #include "av1/encoder/motion_search_facade.h"
63 #include "av1/encoder/palette.h"
64 #include "av1/encoder/pustats.h"
65 #include "av1/encoder/random.h"
66 #include "av1/encoder/ratectrl.h"
67 #include "av1/encoder/rd.h"
68 #include "av1/encoder/rdopt.h"
69 #include "av1/encoder/reconinter_enc.h"
70 #include "av1/encoder/tokenize.h"
71 #include "av1/encoder/tpl_model.h"
72 #include "av1/encoder/tx_search.h"
73 
74 #define LAST_NEW_MV_INDEX 6
75 
76 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
77 // The values are kept in Q12 format and equation used to derive is
78 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
79 #define MODE_THRESH_QBITS 12
80 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
81   10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
82   9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
83   9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
84   9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
85   9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
86   8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
87   8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
88   8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
89   8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
90   7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
91   7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
92   7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
93   7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
94   6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
95   6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
96   6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
97   5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
98   5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
99   5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
100   5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
101   4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
102   4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
103   4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
104   4144,  4120,  4096
105 };
106 
107 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
108   THR_NEARESTMV,
109   THR_NEARESTL2,
110   THR_NEARESTL3,
111   THR_NEARESTB,
112   THR_NEARESTA2,
113   THR_NEARESTA,
114   THR_NEARESTG,
115 
116   THR_NEWMV,
117   THR_NEWL2,
118   THR_NEWL3,
119   THR_NEWB,
120   THR_NEWA2,
121   THR_NEWA,
122   THR_NEWG,
123 
124   THR_NEARMV,
125   THR_NEARL2,
126   THR_NEARL3,
127   THR_NEARB,
128   THR_NEARA2,
129   THR_NEARA,
130   THR_NEARG,
131 
132   THR_GLOBALMV,
133   THR_GLOBALL2,
134   THR_GLOBALL3,
135   THR_GLOBALB,
136   THR_GLOBALA2,
137   THR_GLOBALA,
138   THR_GLOBALG,
139 
140   THR_COMP_NEAREST_NEARESTLA,
141   THR_COMP_NEAREST_NEARESTL2A,
142   THR_COMP_NEAREST_NEARESTL3A,
143   THR_COMP_NEAREST_NEARESTGA,
144   THR_COMP_NEAREST_NEARESTLB,
145   THR_COMP_NEAREST_NEARESTL2B,
146   THR_COMP_NEAREST_NEARESTL3B,
147   THR_COMP_NEAREST_NEARESTGB,
148   THR_COMP_NEAREST_NEARESTLA2,
149   THR_COMP_NEAREST_NEARESTL2A2,
150   THR_COMP_NEAREST_NEARESTL3A2,
151   THR_COMP_NEAREST_NEARESTGA2,
152   THR_COMP_NEAREST_NEARESTLL2,
153   THR_COMP_NEAREST_NEARESTLL3,
154   THR_COMP_NEAREST_NEARESTLG,
155   THR_COMP_NEAREST_NEARESTBA,
156 
157   THR_COMP_NEAR_NEARLB,
158   THR_COMP_NEW_NEWLB,
159   THR_COMP_NEW_NEARESTLB,
160   THR_COMP_NEAREST_NEWLB,
161   THR_COMP_NEW_NEARLB,
162   THR_COMP_NEAR_NEWLB,
163   THR_COMP_GLOBAL_GLOBALLB,
164 
165   THR_COMP_NEAR_NEARLA,
166   THR_COMP_NEW_NEWLA,
167   THR_COMP_NEW_NEARESTLA,
168   THR_COMP_NEAREST_NEWLA,
169   THR_COMP_NEW_NEARLA,
170   THR_COMP_NEAR_NEWLA,
171   THR_COMP_GLOBAL_GLOBALLA,
172 
173   THR_COMP_NEAR_NEARL2A,
174   THR_COMP_NEW_NEWL2A,
175   THR_COMP_NEW_NEARESTL2A,
176   THR_COMP_NEAREST_NEWL2A,
177   THR_COMP_NEW_NEARL2A,
178   THR_COMP_NEAR_NEWL2A,
179   THR_COMP_GLOBAL_GLOBALL2A,
180 
181   THR_COMP_NEAR_NEARL3A,
182   THR_COMP_NEW_NEWL3A,
183   THR_COMP_NEW_NEARESTL3A,
184   THR_COMP_NEAREST_NEWL3A,
185   THR_COMP_NEW_NEARL3A,
186   THR_COMP_NEAR_NEWL3A,
187   THR_COMP_GLOBAL_GLOBALL3A,
188 
189   THR_COMP_NEAR_NEARGA,
190   THR_COMP_NEW_NEWGA,
191   THR_COMP_NEW_NEARESTGA,
192   THR_COMP_NEAREST_NEWGA,
193   THR_COMP_NEW_NEARGA,
194   THR_COMP_NEAR_NEWGA,
195   THR_COMP_GLOBAL_GLOBALGA,
196 
197   THR_COMP_NEAR_NEARL2B,
198   THR_COMP_NEW_NEWL2B,
199   THR_COMP_NEW_NEARESTL2B,
200   THR_COMP_NEAREST_NEWL2B,
201   THR_COMP_NEW_NEARL2B,
202   THR_COMP_NEAR_NEWL2B,
203   THR_COMP_GLOBAL_GLOBALL2B,
204 
205   THR_COMP_NEAR_NEARL3B,
206   THR_COMP_NEW_NEWL3B,
207   THR_COMP_NEW_NEARESTL3B,
208   THR_COMP_NEAREST_NEWL3B,
209   THR_COMP_NEW_NEARL3B,
210   THR_COMP_NEAR_NEWL3B,
211   THR_COMP_GLOBAL_GLOBALL3B,
212 
213   THR_COMP_NEAR_NEARGB,
214   THR_COMP_NEW_NEWGB,
215   THR_COMP_NEW_NEARESTGB,
216   THR_COMP_NEAREST_NEWGB,
217   THR_COMP_NEW_NEARGB,
218   THR_COMP_NEAR_NEWGB,
219   THR_COMP_GLOBAL_GLOBALGB,
220 
221   THR_COMP_NEAR_NEARLA2,
222   THR_COMP_NEW_NEWLA2,
223   THR_COMP_NEW_NEARESTLA2,
224   THR_COMP_NEAREST_NEWLA2,
225   THR_COMP_NEW_NEARLA2,
226   THR_COMP_NEAR_NEWLA2,
227   THR_COMP_GLOBAL_GLOBALLA2,
228 
229   THR_COMP_NEAR_NEARL2A2,
230   THR_COMP_NEW_NEWL2A2,
231   THR_COMP_NEW_NEARESTL2A2,
232   THR_COMP_NEAREST_NEWL2A2,
233   THR_COMP_NEW_NEARL2A2,
234   THR_COMP_NEAR_NEWL2A2,
235   THR_COMP_GLOBAL_GLOBALL2A2,
236 
237   THR_COMP_NEAR_NEARL3A2,
238   THR_COMP_NEW_NEWL3A2,
239   THR_COMP_NEW_NEARESTL3A2,
240   THR_COMP_NEAREST_NEWL3A2,
241   THR_COMP_NEW_NEARL3A2,
242   THR_COMP_NEAR_NEWL3A2,
243   THR_COMP_GLOBAL_GLOBALL3A2,
244 
245   THR_COMP_NEAR_NEARGA2,
246   THR_COMP_NEW_NEWGA2,
247   THR_COMP_NEW_NEARESTGA2,
248   THR_COMP_NEAREST_NEWGA2,
249   THR_COMP_NEW_NEARGA2,
250   THR_COMP_NEAR_NEWGA2,
251   THR_COMP_GLOBAL_GLOBALGA2,
252 
253   THR_COMP_NEAR_NEARLL2,
254   THR_COMP_NEW_NEWLL2,
255   THR_COMP_NEW_NEARESTLL2,
256   THR_COMP_NEAREST_NEWLL2,
257   THR_COMP_NEW_NEARLL2,
258   THR_COMP_NEAR_NEWLL2,
259   THR_COMP_GLOBAL_GLOBALLL2,
260 
261   THR_COMP_NEAR_NEARLL3,
262   THR_COMP_NEW_NEWLL3,
263   THR_COMP_NEW_NEARESTLL3,
264   THR_COMP_NEAREST_NEWLL3,
265   THR_COMP_NEW_NEARLL3,
266   THR_COMP_NEAR_NEWLL3,
267   THR_COMP_GLOBAL_GLOBALLL3,
268 
269   THR_COMP_NEAR_NEARLG,
270   THR_COMP_NEW_NEWLG,
271   THR_COMP_NEW_NEARESTLG,
272   THR_COMP_NEAREST_NEWLG,
273   THR_COMP_NEW_NEARLG,
274   THR_COMP_NEAR_NEWLG,
275   THR_COMP_GLOBAL_GLOBALLG,
276 
277   THR_COMP_NEAR_NEARBA,
278   THR_COMP_NEW_NEWBA,
279   THR_COMP_NEW_NEARESTBA,
280   THR_COMP_NEAREST_NEWBA,
281   THR_COMP_NEW_NEARBA,
282   THR_COMP_NEAR_NEWBA,
283   THR_COMP_GLOBAL_GLOBALBA,
284 
285   THR_DC,
286   THR_PAETH,
287   THR_SMOOTH,
288   THR_SMOOTH_V,
289   THR_SMOOTH_H,
290   THR_H_PRED,
291   THR_V_PRED,
292   THR_D135_PRED,
293   THR_D203_PRED,
294   THR_D157_PRED,
295   THR_D67_PRED,
296   THR_D113_PRED,
297   THR_D45_PRED,
298 };
299 
300 /*!\cond */
301 typedef struct SingleInterModeState {
302   int64_t rd;
303   MV_REFERENCE_FRAME ref_frame;
304   int valid;
305 } SingleInterModeState;
306 
307 typedef struct InterModeSearchState {
308   int64_t best_rd;
309   int64_t best_skip_rd[2];
310   MB_MODE_INFO best_mbmode;
311   int best_rate_y;
312   int best_rate_uv;
313   int best_mode_skippable;
314   int best_skip2;
315   THR_MODES best_mode_index;
316   int num_available_refs;
317   int64_t dist_refs[REF_FRAMES];
318   int dist_order_refs[REF_FRAMES];
319   int64_t mode_threshold[MAX_MODES];
320   int64_t best_intra_rd;
321   unsigned int best_pred_sse;
322 
323   /*!
324    * \brief Keep track of best intra rd for use in compound mode.
325    */
326   int64_t best_pred_rd[REFERENCE_MODES];
327   int64_t best_pred_diff[REFERENCE_MODES];
328   // Save a set of single_newmv for each checked ref_mv.
329   int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
330   int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
331   int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
332   int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
333   // The rd of simple translation in single inter modes
334   int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
335   int64_t best_single_rd[REF_FRAMES];
336   PREDICTION_MODE best_single_mode[REF_FRAMES];
337 
338   // Single search results by [directions][modes][reference frames]
339   SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
340   int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
341   SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
342                                             [FWD_REFS];
343   int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
344   MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
345   IntraModeSearchState intra_search_state;
346   RD_STATS best_y_rdcost;
347 } InterModeSearchState;
348 /*!\endcond */
349 
av1_inter_mode_data_init(TileDataEnc * tile_data)350 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
351   for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
352     InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
353     md->ready = 0;
354     md->num = 0;
355     md->dist_sum = 0;
356     md->ld_sum = 0;
357     md->sse_sum = 0;
358     md->sse_sse_sum = 0;
359     md->sse_ld_sum = 0;
360   }
361 }
362 
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)363 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
364                              int64_t sse, int *est_residue_cost,
365                              int64_t *est_dist) {
366   aom_clear_system_state();
367   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
368   if (md->ready) {
369     if (sse < md->dist_mean) {
370       *est_residue_cost = 0;
371       *est_dist = sse;
372     } else {
373       *est_dist = (int64_t)round(md->dist_mean);
374       const double est_ld = md->a * sse + md->b;
375       // Clamp estimated rate cost by INT_MAX / 2.
376       // TODO(angiebird@google.com): find better solution than clamping.
377       if (fabs(est_ld) < 1e-2) {
378         *est_residue_cost = INT_MAX / 2;
379       } else {
380         double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
381         if (est_residue_cost_dbl < 0) {
382           *est_residue_cost = 0;
383         } else {
384           *est_residue_cost =
385               (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
386         }
387       }
388       if (*est_residue_cost <= 0) {
389         *est_residue_cost = 0;
390         *est_dist = sse;
391       }
392     }
393     return 1;
394   }
395   return 0;
396 }
397 
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)398 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
399   aom_clear_system_state();
400   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
401     const int block_idx = inter_mode_data_block_idx(bsize);
402     InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
403     if (block_idx == -1) continue;
404     if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
405       continue;
406     } else {
407       if (md->ready == 0) {
408         md->dist_mean = md->dist_sum / md->num;
409         md->ld_mean = md->ld_sum / md->num;
410         md->sse_mean = md->sse_sum / md->num;
411         md->sse_sse_mean = md->sse_sse_sum / md->num;
412         md->sse_ld_mean = md->sse_ld_sum / md->num;
413       } else {
414         const double factor = 3;
415         md->dist_mean =
416             (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
417         md->ld_mean =
418             (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
419         md->sse_mean =
420             (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
421         md->sse_sse_mean =
422             (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
423             (factor + 1);
424         md->sse_ld_mean =
425             (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
426             (factor + 1);
427       }
428 
429       const double my = md->ld_mean;
430       const double mx = md->sse_mean;
431       const double dx = sqrt(md->sse_sse_mean);
432       const double dxy = md->sse_ld_mean;
433 
434       md->a = (dxy - mx * my) / (dx * dx - mx * mx);
435       md->b = my - md->a * mx;
436       md->ready = 1;
437 
438       md->num = 0;
439       md->dist_sum = 0;
440       md->ld_sum = 0;
441       md->sse_sum = 0;
442       md->sse_sse_sum = 0;
443       md->sse_ld_sum = 0;
444     }
445     (void)rdmult;
446   }
447 }
448 
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)449 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
450                                             BLOCK_SIZE bsize, int64_t sse,
451                                             int64_t dist, int residue_cost) {
452   if (residue_cost == 0 || sse == dist) return;
453   const int block_idx = inter_mode_data_block_idx(bsize);
454   if (block_idx == -1) return;
455   InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
456   if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
457     aom_clear_system_state();
458     const double ld = (sse - dist) * 1. / residue_cost;
459     ++rd_model->num;
460     rd_model->dist_sum += dist;
461     rd_model->ld_sum += ld;
462     rd_model->sse_sum += sse;
463     rd_model->sse_sse_sum += (double)sse * (double)sse;
464     rd_model->sse_ld_sum += sse * ld;
465   }
466 }
467 
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)468 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
469                                              int mode_rate, int64_t sse,
470                                              int64_t rd, RD_STATS *rd_cost,
471                                              RD_STATS *rd_cost_y,
472                                              RD_STATS *rd_cost_uv,
473                                              const MB_MODE_INFO *mbmi) {
474   const int num = inter_modes_info->num;
475   assert(num < MAX_INTER_MODES);
476   inter_modes_info->mbmi_arr[num] = *mbmi;
477   inter_modes_info->mode_rate_arr[num] = mode_rate;
478   inter_modes_info->sse_arr[num] = sse;
479   inter_modes_info->est_rd_arr[num] = rd;
480   inter_modes_info->rd_cost_arr[num] = *rd_cost;
481   inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
482   inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
483   ++inter_modes_info->num;
484 }
485 
compare_rd_idx_pair(const void * a,const void * b)486 static int compare_rd_idx_pair(const void *a, const void *b) {
487   if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
488     return 0;
489   } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
490     return 1;
491   } else {
492     return -1;
493   }
494 }
495 
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)496 static AOM_INLINE void inter_modes_info_sort(
497     const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
498   if (inter_modes_info->num == 0) {
499     return;
500   }
501   for (int i = 0; i < inter_modes_info->num; ++i) {
502     rd_idx_pair_arr[i].idx = i;
503     rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
504   }
505   qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
506         compare_rd_idx_pair);
507 }
508 
509 // Similar to get_horver_correlation, but also takes into account first
510 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)511 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
512                                        int width, int height, float *hcorr,
513                                        float *vcorr) {
514   // The following notation is used:
515   // x - current pixel
516   // y - left neighbor pixel
517   // z - top neighbor pixel
518   int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
519   int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
520   int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
521 
522   // First, process horizontal correlation on just the first row
523   x_sum += diff[0];
524   x2_sum += diff[0] * diff[0];
525   x_firstrow += diff[0];
526   x2_firstrow += diff[0] * diff[0];
527   for (int j = 1; j < width; ++j) {
528     const int16_t x = diff[j];
529     const int16_t y = diff[j - 1];
530     x_sum += x;
531     x_firstrow += x;
532     x2_sum += x * x;
533     x2_firstrow += x * x;
534     xy_sum += x * y;
535   }
536 
537   // Process vertical correlation in the first column
538   x_firstcol += diff[0];
539   x2_firstcol += diff[0] * diff[0];
540   for (int i = 1; i < height; ++i) {
541     const int16_t x = diff[i * stride];
542     const int16_t z = diff[(i - 1) * stride];
543     x_sum += x;
544     x_firstcol += x;
545     x2_sum += x * x;
546     x2_firstcol += x * x;
547     xz_sum += x * z;
548   }
549 
550   // Now process horiz and vert correlation through the rest unit
551   for (int i = 1; i < height; ++i) {
552     for (int j = 1; j < width; ++j) {
553       const int16_t x = diff[i * stride + j];
554       const int16_t y = diff[i * stride + j - 1];
555       const int16_t z = diff[(i - 1) * stride + j];
556       x_sum += x;
557       x2_sum += x * x;
558       xy_sum += x * y;
559       xz_sum += x * z;
560     }
561   }
562 
563   for (int j = 0; j < width; ++j) {
564     x_finalrow += diff[(height - 1) * stride + j];
565     x2_finalrow +=
566         diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
567   }
568   for (int i = 0; i < height; ++i) {
569     x_finalcol += diff[i * stride + width - 1];
570     x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
571   }
572 
573   int64_t xhor_sum = x_sum - x_finalcol;
574   int64_t xver_sum = x_sum - x_finalrow;
575   int64_t y_sum = x_sum - x_firstcol;
576   int64_t z_sum = x_sum - x_firstrow;
577   int64_t x2hor_sum = x2_sum - x2_finalcol;
578   int64_t x2ver_sum = x2_sum - x2_finalrow;
579   int64_t y2_sum = x2_sum - x2_firstcol;
580   int64_t z2_sum = x2_sum - x2_firstrow;
581 
582   const float num_hor = (float)(height * (width - 1));
583   const float num_ver = (float)((height - 1) * width);
584 
585   const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
586   const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
587 
588   const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
589   const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
590 
591   const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
592   const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
593 
594   if (xhor_var_n > 0 && y_var_n > 0) {
595     *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
596     *hcorr = *hcorr < 0 ? 0 : *hcorr;
597   } else {
598     *hcorr = 1.0;
599   }
600   if (xver_var_n > 0 && z_var_n > 0) {
601     *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
602     *vcorr = *vcorr < 0 ? 0 : *vcorr;
603   } else {
604     *vcorr = 1.0;
605   }
606 }
607 
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)608 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
609                        int64_t *sse_y) {
610   const AV1_COMMON *cm = &cpi->common;
611   const int num_planes = av1_num_planes(cm);
612   const MACROBLOCKD *xd = &x->e_mbd;
613   const MB_MODE_INFO *mbmi = xd->mi[0];
614   int64_t total_sse = 0;
615   for (int plane = 0; plane < num_planes; ++plane) {
616     if (plane && !xd->is_chroma_ref) break;
617     const struct macroblock_plane *const p = &x->plane[plane];
618     const struct macroblockd_plane *const pd = &xd->plane[plane];
619     const BLOCK_SIZE bs =
620         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
621     unsigned int sse;
622 
623     cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
624                        &sse);
625     total_sse += sse;
626     if (!plane && sse_y) *sse_y = sse;
627   }
628   total_sse <<= 4;
629   return total_sse;
630 }
631 
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)632 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
633                           intptr_t block_size, int64_t *ssz) {
634   int i;
635   int64_t error = 0, sqcoeff = 0;
636 
637   for (i = 0; i < block_size; i++) {
638     const int diff = coeff[i] - dqcoeff[i];
639     error += diff * diff;
640     sqcoeff += coeff[i] * coeff[i];
641   }
642 
643   *ssz = sqcoeff;
644   return error;
645 }
646 
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)647 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
648                              intptr_t block_size) {
649   int64_t error = 0;
650 
651   for (int i = 0; i < block_size; i++) {
652     const int diff = coeff[i] - dqcoeff[i];
653     error += diff * diff;
654   }
655 
656   return error;
657 }
658 
659 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)660 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
661                                  const tran_low_t *dqcoeff, intptr_t block_size,
662                                  int64_t *ssz, int bd) {
663   int i;
664   int64_t error = 0, sqcoeff = 0;
665   int shift = 2 * (bd - 8);
666   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
667 
668   for (i = 0; i < block_size; i++) {
669     const int64_t diff = coeff[i] - dqcoeff[i];
670     error += diff * diff;
671     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
672   }
673   assert(error >= 0 && sqcoeff >= 0);
674   error = (error + rounding) >> shift;
675   sqcoeff = (sqcoeff + rounding) >> shift;
676 
677   *ssz = sqcoeff;
678   return error;
679 }
680 #endif
681 
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)682 static int conditional_skipintra(PREDICTION_MODE mode,
683                                  PREDICTION_MODE best_intra_mode) {
684   if (mode == D113_PRED && best_intra_mode != V_PRED &&
685       best_intra_mode != D135_PRED)
686     return 1;
687   if (mode == D67_PRED && best_intra_mode != V_PRED &&
688       best_intra_mode != D45_PRED)
689     return 1;
690   if (mode == D203_PRED && best_intra_mode != H_PRED &&
691       best_intra_mode != D45_PRED)
692     return 1;
693   if (mode == D157_PRED && best_intra_mode != H_PRED &&
694       best_intra_mode != D135_PRED)
695     return 1;
696   return 0;
697 }
698 
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)699 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
700                        int16_t mode_context) {
701   if (is_inter_compound_mode(mode)) {
702     return mode_costs
703         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
704   }
705 
706   int mode_cost = 0;
707   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
708 
709   assert(is_inter_mode(mode));
710 
711   if (mode == NEWMV) {
712     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
713     return mode_cost;
714   } else {
715     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
716     mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
717 
718     if (mode == GLOBALMV) {
719       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
720       return mode_cost;
721     } else {
722       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
723       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
724       mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
725       return mode_cost;
726     }
727   }
728 }
729 
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)730 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
731                                               int ref_idx) {
732   return ref_idx ? compound_ref1_mode(this_mode)
733                  : compound_ref0_mode(this_mode);
734 }
735 
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])736 static AOM_INLINE void estimate_ref_frame_costs(
737     const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
738     int segment_id, unsigned int *ref_costs_single,
739     unsigned int (*ref_costs_comp)[REF_FRAMES]) {
740   int seg_ref_active =
741       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
742   if (seg_ref_active) {
743     memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
744     int ref_frame;
745     for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
746       memset(ref_costs_comp[ref_frame], 0,
747              REF_FRAMES * sizeof((*ref_costs_comp)[0]));
748   } else {
749     int intra_inter_ctx = av1_get_intra_inter_context(xd);
750     ref_costs_single[INTRA_FRAME] =
751         mode_costs->intra_inter_cost[intra_inter_ctx][0];
752     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
753 
754     for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
755       ref_costs_single[i] = base_cost;
756 
757     const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
758     const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
759     const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
760     const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
761     const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
762     const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
763 
764     // Determine cost of a single ref frame, where frame types are represented
765     // by a tree:
766     // Level 0: add cost whether this ref is a forward or backward ref
767     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
768     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
769     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
770     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
772     ref_costs_single[ALTREF2_FRAME] +=
773         mode_costs->single_ref_cost[ctx_p1][0][1];
774     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775 
776     // Level 1: if this ref is forward ref,
777     // add cost whether it is last/last2 or last3/golden
778     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
779     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
780     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
781     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
782 
783     // Level 1: if this ref is backward ref
784     // then add cost whether this ref is altref or backward ref
785     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
786     ref_costs_single[ALTREF2_FRAME] +=
787         mode_costs->single_ref_cost[ctx_p2][1][0];
788     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
789 
790     // Level 2: further add cost whether this ref is last or last2
791     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
792     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
793 
794     // Level 2: last3 or golden
795     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
796     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
797 
798     // Level 2: bwdref or altref2
799     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
800     ref_costs_single[ALTREF2_FRAME] +=
801         mode_costs->single_ref_cost[ctx_p6][5][1];
802 
803     if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
804       // Similar to single ref, determine cost of compound ref frames.
805       // cost_compound_refs = cost_first_ref + cost_second_ref
806       const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
807       const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
808       const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
809       const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
810       const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
811 
812       const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
813       unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
814 
815       ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
816           ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
817               base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
818       ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
819       ref_bicomp_costs[ALTREF_FRAME] = 0;
820 
821       // cost of first ref frame
822       ref_bicomp_costs[LAST_FRAME] +=
823           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
824       ref_bicomp_costs[LAST2_FRAME] +=
825           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
826       ref_bicomp_costs[LAST3_FRAME] +=
827           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
828       ref_bicomp_costs[GOLDEN_FRAME] +=
829           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
830 
831       ref_bicomp_costs[LAST_FRAME] +=
832           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
833       ref_bicomp_costs[LAST2_FRAME] +=
834           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
835 
836       ref_bicomp_costs[LAST3_FRAME] +=
837           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
838       ref_bicomp_costs[GOLDEN_FRAME] +=
839           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
840 
841       // cost of second ref frame
842       ref_bicomp_costs[BWDREF_FRAME] +=
843           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
844       ref_bicomp_costs[ALTREF2_FRAME] +=
845           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
846       ref_bicomp_costs[ALTREF_FRAME] +=
847           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
848 
849       ref_bicomp_costs[BWDREF_FRAME] +=
850           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
851       ref_bicomp_costs[ALTREF2_FRAME] +=
852           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
853 
854       // cost: if one ref frame is forward ref, the other ref is backward ref
855       int ref0, ref1;
856       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
857         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
858           ref_costs_comp[ref0][ref1] =
859               ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
860         }
861       }
862 
863       // cost: if both ref frames are the same side.
864       const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
865       const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
866       const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
867       ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
868           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
869           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
870           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
871       ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
872           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
873           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
874           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
875           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
876       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
877           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
878           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
879           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
880           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
881       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
882           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
883           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
884     } else {
885       int ref0, ref1;
886       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
887         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
888           ref_costs_comp[ref0][ref1] = 512;
889       }
890       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
891       ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
892       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
893       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
894     }
895   }
896 }
897 
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int64_t comp_pred_diff[REFERENCE_MODES],int skippable)898 static AOM_INLINE void store_coding_context(
899 #if CONFIG_INTERNAL_STATS
900     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
901 #else
902     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
903 #endif  // CONFIG_INTERNAL_STATS
904     int64_t comp_pred_diff[REFERENCE_MODES], int skippable) {
905   MACROBLOCKD *const xd = &x->e_mbd;
906 
907   // Take a snapshot of the coding context so it can be
908   // restored if we decide to encode this way
909   ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
910   ctx->skippable = skippable;
911 #if CONFIG_INTERNAL_STATS
912   ctx->best_mode_index = mode_index;
913 #endif  // CONFIG_INTERNAL_STATS
914   ctx->mic = *xd->mi[0];
915   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
916                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
917   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
918   ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
919   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
920 }
921 
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])922 static AOM_INLINE void setup_buffer_ref_mvs_inter(
923     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
924     BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
925   const AV1_COMMON *cm = &cpi->common;
926   const int num_planes = av1_num_planes(cm);
927   const YV12_BUFFER_CONFIG *scaled_ref_frame =
928       av1_get_scaled_ref_frame(cpi, ref_frame);
929   MACROBLOCKD *const xd = &x->e_mbd;
930   MB_MODE_INFO *const mbmi = xd->mi[0];
931   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
932   const struct scale_factors *const sf =
933       get_ref_scale_factors_const(cm, ref_frame);
934   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
935   assert(yv12 != NULL);
936 
937   if (scaled_ref_frame) {
938     // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
939     // support scaling.
940     av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
941                          num_planes);
942   } else {
943     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
944   }
945 
946   // Gets an initial list of candidate vectors from neighbours and orders them
947   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
948                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
949                    mbmi_ext->mode_context);
950   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
951   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
952   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
953   // Further refinement that is encode side only to test the top few candidates
954   // in full and choose the best as the center point for subsequent searches.
955   // The current implementation doesn't support scaling.
956   av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
957               ref_frame, block_size);
958 
959   // Go back to unscaled reference.
960   if (scaled_ref_frame) {
961     // We had temporarily setup pred block based on scaled reference above. Go
962     // back to unscaled reference now, for subsequent use.
963     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
964   }
965 }
966 
967 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
968 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
969 
970 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)971 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
972   const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
973                                      xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
974                                      xd->mb_to_top_edge - LEFT_TOP_MARGIN,
975                                      xd->mb_to_bottom_edge +
976                                          RIGHT_BOTTOM_MARGIN };
977   clamp_mv(mv, &mv_limits);
978 }
979 
980 /* If the current mode shares the same mv with other modes with higher cost,
981  * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)982 static int skip_repeated_mv(const AV1_COMMON *const cm,
983                             const MACROBLOCK *const x,
984                             PREDICTION_MODE this_mode,
985                             const MV_REFERENCE_FRAME ref_frames[2],
986                             InterModeSearchState *search_state) {
987   const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
988   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
989   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
990   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
991   PREDICTION_MODE compare_mode = MB_MODE_COUNT;
992   if (!is_comp_pred) {
993     if (this_mode == NEARMV) {
994       if (ref_mv_count == 0) {
995         // NEARMV has the same motion vector as NEARESTMV
996         compare_mode = NEARESTMV;
997       }
998       if (ref_mv_count == 1 &&
999           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1000         // NEARMV has the same motion vector as GLOBALMV
1001         compare_mode = GLOBALMV;
1002       }
1003     }
1004     if (this_mode == GLOBALMV) {
1005       if (ref_mv_count == 0 &&
1006           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1007         // GLOBALMV has the same motion vector as NEARESTMV
1008         compare_mode = NEARESTMV;
1009       }
1010       if (ref_mv_count == 1) {
1011         // GLOBALMV has the same motion vector as NEARMV
1012         compare_mode = NEARMV;
1013       }
1014     }
1015 
1016     if (compare_mode != MB_MODE_COUNT) {
1017       // Use modelled_rd to check whether compare mode was searched
1018       if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1019           INT64_MAX) {
1020         const int16_t mode_ctx =
1021             av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1022         const int compare_cost =
1023             cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1024         const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1025 
1026         // Only skip if the mode cost is larger than compare mode cost
1027         if (this_cost > compare_cost) {
1028           search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1029               search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1030           return 1;
1031         }
1032       }
1033     }
1034   }
1035   return 0;
1036 }
1037 
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1038 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1039                                      const AV1_COMMON *cm,
1040                                      const MACROBLOCK *x) {
1041   const MACROBLOCKD *const xd = &x->e_mbd;
1042   *out_mv = in_mv;
1043   lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1044                      cm->features.cur_frame_force_integer_mv);
1045   clamp_mv2(&out_mv->as_mv, xd);
1046   return av1_is_fullmv_in_range(&x->mv_limits,
1047                                 get_fullmv_from_mv(&out_mv->as_mv));
1048 }
1049 
1050 // To use single newmv directly for compound modes, need to clamp the mv to the
1051 // valid mv range. Without this, encoder would generate out of range mv, and
1052 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1053 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1054                                      int ref_idx) {
1055   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1056   SubpelMvLimits mv_limits;
1057 
1058   av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1059   clamp_mv(&mv->as_mv, &mv_limits);
1060 }
1061 
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1062 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1063                             const BLOCK_SIZE bsize, int_mv *cur_mv,
1064                             int *const rate_mv, HandleInterModeArgs *const args,
1065                             inter_mode_info *mode_info) {
1066   MACROBLOCKD *const xd = &x->e_mbd;
1067   MB_MODE_INFO *const mbmi = xd->mi[0];
1068   const int is_comp_pred = has_second_ref(mbmi);
1069   const PREDICTION_MODE this_mode = mbmi->mode;
1070   const int refs[2] = { mbmi->ref_frame[0],
1071                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1072   const int ref_mv_idx = mbmi->ref_mv_idx;
1073 
1074   if (is_comp_pred) {
1075     const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1076     const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1077     if (this_mode == NEW_NEWMV) {
1078       if (valid_mv0) {
1079         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1080         clamp_mv_in_range(x, &cur_mv[0], 0);
1081       }
1082       if (valid_mv1) {
1083         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1084         clamp_mv_in_range(x, &cur_mv[1], 1);
1085       }
1086       *rate_mv = 0;
1087       for (int i = 0; i < 2; ++i) {
1088         const int_mv ref_mv = av1_get_ref_mv(x, i);
1089         *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1090                                     x->mv_costs->nmv_joint_cost,
1091                                     x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1092       }
1093     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1094       if (valid_mv1) {
1095         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1096         clamp_mv_in_range(x, &cur_mv[1], 1);
1097       }
1098       const int_mv ref_mv = av1_get_ref_mv(x, 1);
1099       *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1100                                  x->mv_costs->nmv_joint_cost,
1101                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1102     } else {
1103       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1104       if (valid_mv0) {
1105         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1106         clamp_mv_in_range(x, &cur_mv[0], 0);
1107       }
1108       const int_mv ref_mv = av1_get_ref_mv(x, 0);
1109       *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1110                                  x->mv_costs->nmv_joint_cost,
1111                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1112     }
1113   } else {
1114     // Single ref case.
1115     const int ref_idx = 0;
1116     int search_range = INT_MAX;
1117 
1118     if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1119       const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1120       int min_mv_diff = INT_MAX;
1121       int best_match = -1;
1122       MV prev_ref_mv[2] = { { 0 } };
1123       for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1124         prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1125                                                      idx, &x->mbmi_ext)
1126                                .as_mv;
1127         const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1128                                        abs(ref_mv.col - prev_ref_mv[idx].col));
1129 
1130         if (min_mv_diff > ref_mv_diff) {
1131           min_mv_diff = ref_mv_diff;
1132           best_match = idx;
1133         }
1134       }
1135 
1136       if (min_mv_diff < (16 << 3)) {
1137         if (args->single_newmv_valid[best_match][refs[0]]) {
1138           search_range = min_mv_diff;
1139           search_range +=
1140               AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1141                          prev_ref_mv[best_match].row),
1142                      abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1143                          prev_ref_mv[best_match].col));
1144           // Get full pixel search range.
1145           search_range = (search_range + 4) >> 3;
1146         }
1147       }
1148     }
1149 
1150     int_mv best_mv;
1151     av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1152                              mode_info, &best_mv);
1153     if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1154 
1155     args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1156     args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1157     args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1158     cur_mv[0].as_int = best_mv.as_int;
1159   }
1160 
1161   return 0;
1162 }
1163 
update_mode_start_end_index(const AV1_COMP * const cpi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1164 static INLINE void update_mode_start_end_index(const AV1_COMP *const cpi,
1165                                                int *mode_index_start,
1166                                                int *mode_index_end,
1167                                                int last_motion_mode_allowed,
1168                                                int interintra_allowed,
1169                                                int eval_motion_mode) {
1170   *mode_index_start = (int)SIMPLE_TRANSLATION;
1171   *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1172   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1173     if (!eval_motion_mode) {
1174       *mode_index_end = (int)SIMPLE_TRANSLATION;
1175     } else {
1176       // Set the start index appropriately to process motion modes other than
1177       // simple translation
1178       *mode_index_start = 1;
1179     }
1180   }
1181 }
1182 
1183 /*!\brief AV1 motion mode search
1184  *
1185  * \ingroup inter_mode_search
1186  * Function to search over and determine the motion mode. It will update
1187  * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1188  * WARPED_CAUSAL and determine any necessary side information for the selected
1189  * motion mode. It will also perform the full transform search, unless the
1190  * input parameter do_tx_search indicates to do an estimation of the RD rather
1191  * than an RD corresponding to a full transform search. It will return the
1192  * RD for the final motion_mode.
1193  * Do the RD search for a given inter mode and compute all information relevant
1194  * to the input mode. It will compute the best MV,
1195  * compound parameters (if the mode is a compound mode) and interpolation filter
1196  * parameters.
1197  *
1198  * \param[in]     cpi               Top-level encoder structure.
1199  * \param[in]     tile_data         Pointer to struct holding adaptive
1200  *                                  data/contexts/models for the tile during
1201  *                                  encoding.
1202  * \param[in]     x                 Pointer to struct holding all the data for
1203  *                                  the current macroblock.
1204  * \param[in]     bsize             Current block size.
1205  * \param[in,out] rd_stats          Struct to keep track of the overall RD
1206  *                                  information.
1207  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1208  *                                  for only the Y plane.
1209  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1210  *                                  for only the UV planes.
1211  * \param[in]     args              HandleInterModeArgs struct holding
1212  *                                  miscellaneous arguments for inter mode
1213  *                                  search. See the documentation for this
1214  *                                  struct for a description of each member.
1215  * \param[in]     ref_best_rd       Best RD found so far for this block.
1216  *                                  It is used for early termination of this
1217  *                                  search if the RD exceeds this value.
1218  * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1219  *                                  best total RD for a skip mode so far, and
1220  *                                  skip_rd[1] is the best RD for a skip mode so
1221  *                                  far in luma. This is used as a speed feature
1222  *                                  to skip the transform search if the computed
1223  *                                  skip RD for the current mode is not better
1224  *                                  than the best skip_rd so far.
1225  * \param[in,out] rate_mv           The rate associated with the motion vectors.
1226  *                                  This will be modified if a motion search is
1227  *                                  done in the motion mode search.
1228  * \param[in,out] orig_dst          A prediction buffer to hold a computed
1229  *                                  prediction. This will eventually hold the
1230  *                                  final prediction, and the tmp_dst info will
1231  *                                  be copied here.
1232  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1233  *                                  do_tx_search (see below) is 0.
1234  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1235  *                                  a full transform search. This will compute
1236  *                                  an estimated RD for the modes without the
1237  *                                  transform search and later perform the full
1238  *                                  transform search on the best candidates.
1239  * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1240  *                                  information to perform a full transform
1241  *                                  search only on winning candidates searched
1242  *                                  with an estimate for transform coding RD.
1243  * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1244  *                                  motion modes other than SIMPLE_TRANSLATION.
1245  * \param[out]    yrd               Stores the rdcost corresponding to encoding
1246  *                                  the luma plane.
1247  * \return Returns INT64_MAX if the determined motion mode is invalid and the
1248  * current motion mode being tested should be skipped. It returns 0 if the
1249  * motion mode search is a success.
1250  */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1251 static int64_t motion_mode_rd(
1252     const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1253     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1254     RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1255     int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1256     int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1257     int eval_motion_mode, int64_t *yrd) {
1258   const AV1_COMMON *const cm = &cpi->common;
1259   const FeatureFlags *const features = &cm->features;
1260   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1261   const int num_planes = av1_num_planes(cm);
1262   MACROBLOCKD *xd = &x->e_mbd;
1263   MB_MODE_INFO *mbmi = xd->mi[0];
1264   const int is_comp_pred = has_second_ref(mbmi);
1265   const PREDICTION_MODE this_mode = mbmi->mode;
1266   const int rate2_nocoeff = rd_stats->rate;
1267   int best_xskip_txfm = 0;
1268   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1269   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1270   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1271   const int rate_mv0 = *rate_mv;
1272   const int interintra_allowed = cm->seq_params.enable_interintra_compound &&
1273                                  is_interintra_allowed(mbmi) &&
1274                                  mbmi->compound_idx;
1275   WARP_SAMPLE_INFO *const warp_sample_info =
1276       &x->warp_sample_info[mbmi->ref_frame[0]];
1277   int *pts0 = warp_sample_info->pts;
1278   int *pts_inref0 = warp_sample_info->pts_inref;
1279 
1280   assert(mbmi->ref_frame[1] != INTRA_FRAME);
1281   const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1282   av1_invalid_rd_stats(&best_rd_stats);
1283   aom_clear_system_state();
1284   mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1285   MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1286   *yrd = INT64_MAX;
1287   if (features->switchable_motion_mode) {
1288     // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1289     // is allowed.
1290     last_motion_mode_allowed = motion_mode_allowed(
1291         xd->global_motion, xd, mbmi, features->allow_warped_motion);
1292   }
1293 
1294   if (last_motion_mode_allowed == WARPED_CAUSAL) {
1295     // Collect projection samples used in least squares approximation of
1296     // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1297     if (warp_sample_info->num < 0) {
1298       warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1299     }
1300     mbmi->num_proj_ref = warp_sample_info->num;
1301   }
1302   const int total_samples = mbmi->num_proj_ref;
1303   if (total_samples == 0) {
1304     // Do not search WARPED_CAUSAL if there are no samples to use to determine
1305     // warped parameters.
1306     last_motion_mode_allowed = OBMC_CAUSAL;
1307   }
1308 
1309   const MB_MODE_INFO base_mbmi = *mbmi;
1310   MB_MODE_INFO best_mbmi;
1311   const int interp_filter = features->interp_filter;
1312   const int switchable_rate =
1313       av1_is_interp_needed(xd)
1314           ? av1_get_switchable_rate(x, xd, interp_filter,
1315                                     cm->seq_params.enable_dual_filter)
1316           : 0;
1317   int64_t best_rd = INT64_MAX;
1318   int best_rate_mv = rate_mv0;
1319   const int mi_row = xd->mi_row;
1320   const int mi_col = xd->mi_col;
1321   int mode_index_start, mode_index_end;
1322   // Modify the start and end index according to speed features. For example,
1323   // if SIMPLE_TRANSLATION has already been searched according to
1324   // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1325   // to avoid searching it again.
1326   update_mode_start_end_index(cpi, &mode_index_start, &mode_index_end,
1327                               last_motion_mode_allowed, interintra_allowed,
1328                               eval_motion_mode);
1329   // Main function loop. This loops over all of the possible motion modes and
1330   // computes RD to determine the best one. This process includes computing
1331   // any necessary side information for the motion mode and performing the
1332   // transform search.
1333   for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1334        mode_index++) {
1335     if (args->skip_motion_mode && mode_index) continue;
1336     int tmp_rate2 = rate2_nocoeff;
1337     const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1338     int tmp_rate_mv = rate_mv0;
1339 
1340     *mbmi = base_mbmi;
1341     if (is_interintra_mode) {
1342       // Only use SIMPLE_TRANSLATION for interintra
1343       mbmi->motion_mode = SIMPLE_TRANSLATION;
1344     } else {
1345       mbmi->motion_mode = (MOTION_MODE)mode_index;
1346       assert(mbmi->ref_frame[1] != INTRA_FRAME);
1347     }
1348 
1349     // Do not search OBMC if the probability of selecting it is below a
1350     // predetermined threshold for this update_type and block size.
1351     const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
1352     const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
1353                            cpi->sf.inter_sf.prune_obmc_prob_thresh;
1354     if ((!cpi->oxcf.motion_mode_cfg.enable_obmc ||
1355          cpi->sf.rt_sf.use_nonrd_pick_mode || prune_obmc) &&
1356         mbmi->motion_mode == OBMC_CAUSAL)
1357       continue;
1358 
1359     if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1360       // SIMPLE_TRANSLATION mode: no need to recalculate.
1361       // The prediction is calculated before motion_mode_rd() is called in
1362       // handle_inter_mode()
1363     } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1364       const uint32_t cur_mv = mbmi->mv[0].as_int;
1365       // OBMC_CAUSAL not allowed for compound prediction
1366       assert(!is_comp_pred);
1367       if (have_newmv_in_inter_mode(this_mode)) {
1368         av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1369                                  &mbmi->mv[0]);
1370         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1371       }
1372       if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1373         // Build the predictor according to the current motion vector if it has
1374         // not already been built
1375         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1376                                       0, av1_num_planes(cm) - 1);
1377       }
1378       // Build the inter predictor by blending the predictor corresponding to
1379       // this MV, and the neighboring blocks using the OBMC model
1380       av1_build_obmc_inter_prediction(
1381           cm, xd, args->above_pred_buf, args->above_pred_stride,
1382           args->left_pred_buf, args->left_pred_stride);
1383 #if !CONFIG_REALTIME_ONLY
1384     } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1385       int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1386       mbmi->motion_mode = WARPED_CAUSAL;
1387       mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1388       mbmi->interp_filters =
1389           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1390 
1391       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1392       memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1393       // Select the samples according to motion vector difference
1394       if (mbmi->num_proj_ref > 1) {
1395         mbmi->num_proj_ref = av1_selectSamples(
1396             &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1397       }
1398 
1399       // Compute the warped motion parameters with a least squares fit
1400       //  using the collected samples
1401       if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1402                                mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1403                                &mbmi->wm_params, mi_row, mi_col)) {
1404         assert(!is_comp_pred);
1405         if (have_newmv_in_inter_mode(this_mode)) {
1406           // Refine MV for NEWMV mode
1407           const int_mv mv0 = mbmi->mv[0];
1408           const WarpedMotionParams wm_params0 = mbmi->wm_params;
1409           const int num_proj_ref0 = mbmi->num_proj_ref;
1410 
1411           const int_mv ref_mv = av1_get_ref_mv(x, 0);
1412           SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1413           av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1414                                             &ref_mv.as_mv, NULL);
1415 
1416           // Refine MV in a small range.
1417           av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1418                                total_samples);
1419 
1420           if (mv0.as_int != mbmi->mv[0].as_int) {
1421             // Keep the refined MV and WM parameters.
1422             tmp_rate_mv = av1_mv_bit_cost(
1423                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1424                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1425             tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1426           } else {
1427             // Restore the old MV and WM parameters.
1428             mbmi->mv[0] = mv0;
1429             mbmi->wm_params = wm_params0;
1430             mbmi->num_proj_ref = num_proj_ref0;
1431           }
1432         }
1433 
1434         // Build the warped predictor
1435         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1436                                       av1_num_planes(cm) - 1);
1437       } else {
1438         continue;
1439       }
1440 #endif  // !CONFIG_REALTIME_ONLY
1441     } else if (is_interintra_mode) {
1442       const int ret =
1443           av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1444                                       &tmp_rate_mv, &tmp_rate2, orig_dst);
1445       if (ret < 0) continue;
1446     }
1447 
1448     // If we are searching newmv and the mv is the same as refmv, skip the
1449     // current mode
1450     if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1451 
1452     // Update rd_stats for the current motion mode
1453     txfm_info->skip_txfm = 0;
1454     rd_stats->dist = 0;
1455     rd_stats->sse = 0;
1456     rd_stats->skip_txfm = 1;
1457     rd_stats->rate = tmp_rate2;
1458     const ModeCosts *mode_costs = &x->mode_costs;
1459     if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1460     if (interintra_allowed) {
1461       rd_stats->rate +=
1462           mode_costs->interintra_cost[size_group_lookup[bsize]]
1463                                      [mbmi->ref_frame[1] == INTRA_FRAME];
1464     }
1465     if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1466         (mbmi->ref_frame[1] != INTRA_FRAME)) {
1467       if (last_motion_mode_allowed == WARPED_CAUSAL) {
1468         rd_stats->rate +=
1469             mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1470       } else {
1471         rd_stats->rate +=
1472             mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1473       }
1474     }
1475 
1476     int64_t this_yrd = INT64_MAX;
1477 
1478     if (!do_tx_search) {
1479       // Avoid doing a transform search here to speed up the overall mode
1480       // search. It will be done later in the mode search if the current
1481       // motion mode seems promising.
1482       int64_t curr_sse = -1;
1483       int64_t sse_y = -1;
1484       int est_residue_cost = 0;
1485       int64_t est_dist = 0;
1486       int64_t est_rd = 0;
1487       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1488         curr_sse = get_sse(cpi, x, &sse_y);
1489         const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1490                                                  &est_residue_cost, &est_dist);
1491         (void)has_est_rd;
1492         assert(has_est_rd);
1493       } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1494                  cpi->sf.rt_sf.use_nonrd_pick_mode) {
1495         model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1496             cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1497             NULL, &curr_sse, NULL, NULL, NULL);
1498         sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1499       }
1500       est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1501       if (est_rd * 0.80 > *best_est_rd) {
1502         mbmi->ref_frame[1] = ref_frame_1;
1503         continue;
1504       }
1505       const int mode_rate = rd_stats->rate;
1506       rd_stats->rate += est_residue_cost;
1507       rd_stats->dist = est_dist;
1508       rd_stats->rdcost = est_rd;
1509       if (rd_stats->rdcost < *best_est_rd) {
1510         *best_est_rd = rd_stats->rdcost;
1511         assert(sse_y >= 0);
1512         ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
1513                              ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1514                              : INT64_MAX;
1515       }
1516       if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1517         if (!is_comp_pred) {
1518           assert(curr_sse >= 0);
1519           inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1520                                 rd_stats->rdcost, rd_stats, rd_stats_y,
1521                                 rd_stats_uv, mbmi);
1522         }
1523       } else {
1524         assert(curr_sse >= 0);
1525         inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1526                               rd_stats->rdcost, rd_stats, rd_stats_y,
1527                               rd_stats_uv, mbmi);
1528       }
1529       mbmi->skip_txfm = 0;
1530     } else {
1531       // Perform full transform search
1532       int64_t skip_rd = INT64_MAX;
1533       int64_t skip_rdy = INT64_MAX;
1534       if (cpi->sf.inter_sf.txfm_rd_gate_level) {
1535         // Check if the mode is good enough based on skip RD
1536         int64_t sse_y = INT64_MAX;
1537         int64_t curr_sse = get_sse(cpi, x, &sse_y);
1538         skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1539         skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1540         int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1541                                         cpi->sf.inter_sf.txfm_rd_gate_level, 0);
1542         if (!eval_txfm) continue;
1543       }
1544 
1545       // Do transform search
1546       const int mode_rate = rd_stats->rate;
1547       if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1548                            rd_stats->rate, ref_best_rd)) {
1549         if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1550           return INT64_MAX;
1551         }
1552         continue;
1553       }
1554       const int skip_ctx = av1_get_skip_txfm_context(xd);
1555       const int y_rate =
1556           rd_stats->skip_txfm
1557               ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1558               : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1559       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1560 
1561       const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1562       if (curr_rd < ref_best_rd) {
1563         ref_best_rd = curr_rd;
1564         ref_skip_rd[0] = skip_rd;
1565         ref_skip_rd[1] = skip_rdy;
1566       }
1567       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1568         inter_mode_data_push(
1569             tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1570             rd_stats_y->rate + rd_stats_uv->rate +
1571                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1572       }
1573     }
1574 
1575     if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1576       if (is_nontrans_global_motion(xd, xd->mi[0])) {
1577         mbmi->interp_filters =
1578             av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1579       }
1580     }
1581 
1582     const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1583     if (mode_index == 0) {
1584       args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1585     }
1586     if (mode_index == 0 || tmp_rd < best_rd) {
1587       // Update best_rd data if this is the best motion mode so far
1588       best_mbmi = *mbmi;
1589       best_rd = tmp_rd;
1590       best_rd_stats = *rd_stats;
1591       best_rd_stats_y = *rd_stats_y;
1592       best_rate_mv = tmp_rate_mv;
1593       *yrd = this_yrd;
1594       if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1595       memcpy(best_blk_skip, txfm_info->blk_skip,
1596              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1597       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1598       best_xskip_txfm = mbmi->skip_txfm;
1599     }
1600   }
1601   // Update RD and mbmi stats for selected motion mode
1602   mbmi->ref_frame[1] = ref_frame_1;
1603   *rate_mv = best_rate_mv;
1604   if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1605     av1_invalid_rd_stats(rd_stats);
1606     restore_dst_buf(xd, *orig_dst, num_planes);
1607     return INT64_MAX;
1608   }
1609   *mbmi = best_mbmi;
1610   *rd_stats = best_rd_stats;
1611   *rd_stats_y = best_rd_stats_y;
1612   if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1613   memcpy(txfm_info->blk_skip, best_blk_skip,
1614          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1615   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1616   txfm_info->skip_txfm = best_xskip_txfm;
1617 
1618   restore_dst_buf(xd, *orig_dst, num_planes);
1619   return 0;
1620 }
1621 
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst)1622 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1623                             MACROBLOCK *const x, BLOCK_SIZE bsize,
1624                             const BUFFER_SET *const orig_dst) {
1625   assert(bsize < BLOCK_SIZES_ALL);
1626   const AV1_COMMON *cm = &cpi->common;
1627   const int num_planes = av1_num_planes(cm);
1628   MACROBLOCKD *const xd = &x->e_mbd;
1629   const int mi_row = xd->mi_row;
1630   const int mi_col = xd->mi_col;
1631   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0,
1632                                 av1_num_planes(cm) - 1);
1633 
1634   int64_t total_sse = 0;
1635   for (int plane = 0; plane < num_planes; ++plane) {
1636     const struct macroblock_plane *const p = &x->plane[plane];
1637     const struct macroblockd_plane *const pd = &xd->plane[plane];
1638     const BLOCK_SIZE plane_bsize =
1639         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1640     const int bw = block_size_wide[plane_bsize];
1641     const int bh = block_size_high[plane_bsize];
1642 
1643     av1_subtract_plane(x, plane_bsize, plane);
1644     int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
1645     total_sse += sse;
1646   }
1647   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1648   rd_stats->dist = rd_stats->sse = total_sse;
1649   rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1650   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1651 
1652   restore_dst_buf(xd, *orig_dst, num_planes);
1653   return 0;
1654 }
1655 
1656 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1657 // mode
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1658 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1659                                       int ref_idx,
1660                                       const MV_REFERENCE_FRAME *ref_frame,
1661                                       PREDICTION_MODE single_mode) {
1662   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1663   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1664   assert(single_mode != NEWMV);
1665   if (single_mode == NEARESTMV) {
1666     return 0;
1667   } else if (single_mode == NEARMV) {
1668     // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1669     // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1670     if (ref_mv_count < 2) return 1;
1671   } else if (single_mode == GLOBALMV) {
1672     // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1673     if (ref_mv_count == 0) return 1;
1674     // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1675     else if (ref_mv_count == 1)
1676       return 0;
1677 
1678     int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1679     // Check GLOBALMV is matching with any mv in ref_mv_stack
1680     for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1681       int_mv this_mv;
1682 
1683       if (ref_idx == 0)
1684         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1685       else
1686         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1687 
1688       if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1689         return 1;
1690     }
1691   }
1692   return 0;
1693 }
1694 
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1695 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1696                               int ref_idx, int ref_mv_idx,
1697                               int skip_repeated_ref_mv,
1698                               const MV_REFERENCE_FRAME *ref_frame,
1699                               const MB_MODE_INFO_EXT *mbmi_ext) {
1700   const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1701   assert(is_inter_singleref_mode(single_mode));
1702   if (single_mode == NEWMV) {
1703     this_mv->as_int = INVALID_MV;
1704   } else if (single_mode == GLOBALMV) {
1705     if (skip_repeated_ref_mv &&
1706         check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1707       return 0;
1708     *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1709   } else {
1710     assert(single_mode == NEARMV || single_mode == NEARESTMV);
1711     const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1712     const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1713     if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1714       assert(ref_mv_offset >= 0);
1715       if (ref_idx == 0) {
1716         *this_mv =
1717             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1718       } else {
1719         *this_mv =
1720             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1721       }
1722     } else {
1723       if (skip_repeated_ref_mv &&
1724           check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1725         return 0;
1726       *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1727     }
1728   }
1729   return 1;
1730 }
1731 
1732 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1733 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1734                                const AV1_COMMON *cm, const MACROBLOCK *x,
1735                                int skip_repeated_ref_mv) {
1736   const MACROBLOCKD *xd = &x->e_mbd;
1737   const MB_MODE_INFO *mbmi = xd->mi[0];
1738   const int is_comp_pred = has_second_ref(mbmi);
1739 
1740   int ret = 1;
1741   for (int i = 0; i < is_comp_pred + 1; ++i) {
1742     int_mv this_mv;
1743     this_mv.as_int = INVALID_MV;
1744     ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1745                       skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1746     if (!ret) return 0;
1747     const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1748     if (single_mode == NEWMV) {
1749       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1750       cur_mv[i] =
1751           (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1752                          .this_mv
1753                    : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1754                          .comp_mv;
1755     } else {
1756       ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1757     }
1758   }
1759   return ret;
1760 }
1761 
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1762 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1763                                const MB_MODE_INFO_EXT *mbmi_ext,
1764                                const int (*const drl_mode_cost0)[2],
1765                                int8_t ref_frame_type) {
1766   int cost = 0;
1767   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1768     for (int idx = 0; idx < 2; ++idx) {
1769       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1770         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1771         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1772         if (mbmi->ref_mv_idx == idx) return cost;
1773       }
1774     }
1775     return cost;
1776   }
1777 
1778   if (have_nearmv_in_inter_mode(mbmi->mode)) {
1779     for (int idx = 1; idx < 3; ++idx) {
1780       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1781         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1782         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1783         if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1784       }
1785     }
1786     return cost;
1787   }
1788   return cost;
1789 }
1790 
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1791 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1792                                         const MB_MODE_INFO *const mbmi,
1793                                         PREDICTION_MODE this_mode) {
1794   for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1795     const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1796     const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1797     if (single_mode == NEWMV &&
1798         args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1799       return 0;
1800     }
1801   }
1802   return 1;
1803 }
1804 
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1805 static int get_drl_refmv_count(const MACROBLOCK *const x,
1806                                const MV_REFERENCE_FRAME *ref_frame,
1807                                PREDICTION_MODE mode) {
1808   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1809   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1810   const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1811   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1812   const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1813   const int has_drl =
1814       (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1815   const int ref_set =
1816       has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1817 
1818   return ref_set;
1819 }
1820 
1821 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1822 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1823                                          const int qindex,
1824                                          const int ref_mv_idx) {
1825   if (reduce_inter_modes >= 3) return 1;
1826   // Q-index logic based pruning is enabled only for
1827   // reduce_inter_modes = 2.
1828   assert(reduce_inter_modes == 2);
1829   // When reduce_inter_modes=2, pruning happens as below based on q index.
1830   // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1831   // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1832   // For q index range between 171 and 255: no pruning.
1833   const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1834   return (ref_mv_idx >= min_prune_ref_mv_idx);
1835 }
1836 
1837 // Whether this reference motion vector can be skipped, based on initial
1838 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1839 static bool ref_mv_idx_early_breakout(
1840     const SPEED_FEATURES *const sf,
1841     const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1842     const HandleInterModeArgs *const args, int64_t ref_best_rd,
1843     int ref_mv_idx) {
1844   MACROBLOCKD *xd = &x->e_mbd;
1845   MB_MODE_INFO *mbmi = xd->mi[0];
1846   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1847   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1848   const int is_comp_pred = has_second_ref(mbmi);
1849   if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1850     if (mbmi->ref_frame[0] == LAST2_FRAME ||
1851         mbmi->ref_frame[0] == LAST3_FRAME ||
1852         mbmi->ref_frame[1] == LAST2_FRAME ||
1853         mbmi->ref_frame[1] == LAST3_FRAME) {
1854       const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1855       if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1856           REF_CAT_LEVEL) {
1857         return true;
1858       }
1859     }
1860     // TODO(any): Experiment with reduce_inter_modes for compound prediction
1861     if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1862         have_newmv_in_inter_mode(mbmi->mode)) {
1863       if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1864           mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1865         const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1866         const int do_prune = prune_ref_mv_idx_using_qindex(
1867             sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1868         if (do_prune &&
1869             (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1870              REF_CAT_LEVEL)) {
1871           return true;
1872         }
1873       }
1874     }
1875   }
1876 
1877   mbmi->ref_mv_idx = ref_mv_idx;
1878   if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1879     return true;
1880   }
1881   size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1882   const int drl_cost = get_drl_cost(
1883       mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1884   est_rd_rate += drl_cost;
1885   if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1886       mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1887     return true;
1888   }
1889   return false;
1890 }
1891 
1892 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,inter_mode_info * mode_info,int64_t ref_best_rd,BLOCK_SIZE bsize)1893 static int64_t simple_translation_pred_rd(
1894     AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats,
1895     HandleInterModeArgs *args, int ref_mv_idx, inter_mode_info *mode_info,
1896     int64_t ref_best_rd, BLOCK_SIZE bsize) {
1897   MACROBLOCKD *xd = &x->e_mbd;
1898   MB_MODE_INFO *mbmi = xd->mi[0];
1899   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1900   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1901   const AV1_COMMON *cm = &cpi->common;
1902   const int is_comp_pred = has_second_ref(mbmi);
1903   const ModeCosts *mode_costs = &x->mode_costs;
1904 
1905   struct macroblockd_plane *p = xd->plane;
1906   const BUFFER_SET orig_dst = {
1907     { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1908     { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1909   };
1910   av1_init_rd_stats(rd_stats);
1911 
1912   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1913   mbmi->comp_group_idx = 0;
1914   mbmi->compound_idx = 1;
1915   if (mbmi->ref_frame[1] == INTRA_FRAME) {
1916     mbmi->ref_frame[1] = NONE_FRAME;
1917   }
1918   int16_t mode_ctx =
1919       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1920 
1921   mbmi->num_proj_ref = 0;
1922   mbmi->motion_mode = SIMPLE_TRANSLATION;
1923   mbmi->ref_mv_idx = ref_mv_idx;
1924 
1925   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1926   const int drl_cost =
1927       get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
1928   rd_stats->rate += drl_cost;
1929   mode_info[ref_mv_idx].drl_cost = drl_cost;
1930 
1931   int_mv cur_mv[2];
1932   if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
1933     return INT64_MAX;
1934   }
1935   assert(have_nearmv_in_inter_mode(mbmi->mode));
1936   for (int i = 0; i < is_comp_pred + 1; ++i) {
1937     mbmi->mv[i].as_int = cur_mv[i].as_int;
1938   }
1939   const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
1940   rd_stats->rate += ref_mv_cost;
1941 
1942   if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
1943     return INT64_MAX;
1944   }
1945 
1946   mbmi->motion_mode = SIMPLE_TRANSLATION;
1947   mbmi->num_proj_ref = 0;
1948   if (is_comp_pred) {
1949     // Only compound_average
1950     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1951     mbmi->comp_group_idx = 0;
1952     mbmi->compound_idx = 1;
1953   }
1954   set_default_interp_filters(mbmi, cm->features.interp_filter);
1955 
1956   const int mi_row = xd->mi_row;
1957   const int mi_col = xd->mi_col;
1958   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
1959                                 AOM_PLANE_Y, AOM_PLANE_Y);
1960   int est_rate;
1961   int64_t est_dist;
1962   model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
1963                                   NULL, NULL, NULL, NULL, NULL);
1964   return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
1965 }
1966 
1967 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
1968 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
1969 // it is included.
mask_set_bit(int * mask,int index)1970 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
1971 
mask_check_bit(int mask,int index)1972 static INLINE bool mask_check_bit(int mask, int index) {
1973   return (mask >> index) & 0x1;
1974 }
1975 
1976 // Before performing the full MV search in handle_inter_mode, do a simple
1977 // translation search and see if we can eliminate any motion vectors.
1978 // Returns an integer where, if the i-th bit is set, it means that the i-th
1979 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,inter_mode_info * mode_info,BLOCK_SIZE bsize,const int ref_set)1980 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
1981                                 RD_STATS *rd_stats,
1982                                 HandleInterModeArgs *const args,
1983                                 int64_t ref_best_rd, inter_mode_info *mode_info,
1984                                 BLOCK_SIZE bsize, const int ref_set) {
1985   AV1_COMMON *const cm = &cpi->common;
1986   const MACROBLOCKD *const xd = &x->e_mbd;
1987   const MB_MODE_INFO *const mbmi = xd->mi[0];
1988   const PREDICTION_MODE this_mode = mbmi->mode;
1989 
1990   // Only search indices if they have some chance of being good.
1991   int good_indices = 0;
1992   for (int i = 0; i < ref_set; ++i) {
1993     if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
1994                                   ref_best_rd, i)) {
1995       continue;
1996     }
1997     mask_set_bit(&good_indices, i);
1998   }
1999 
2000   // Only prune in NEARMV mode, if the speed feature is set, and the block size
2001   // is large enough. If these conditions are not met, return all good indices
2002   // found so far.
2003   if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2004     return good_indices;
2005   if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2006   if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2007   // Do not prune when there is internal resizing. TODO(elliottk) fix this
2008   // so b/2384 can be resolved.
2009   if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2010       (mbmi->ref_frame[1] > 0 &&
2011        av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2012     return good_indices;
2013   }
2014 
2015   // Calculate the RD cost for the motion vectors using simple translation.
2016   int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2017   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2018     // If this index is bad, ignore it.
2019     if (!mask_check_bit(good_indices, ref_mv_idx)) {
2020       continue;
2021     }
2022     idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2023         cpi, x, rd_stats, args, ref_mv_idx, mode_info, ref_best_rd, bsize);
2024   }
2025   // Find the index with the best RD cost.
2026   int best_idx = 0;
2027   for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2028     if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2029       best_idx = i;
2030     }
2031   }
2032   // Only include indices that are good and within a % of the best.
2033   const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2034   // If the simple translation cost is not within this multiple of the
2035   // best RD, skip it. Note that the cutoff is derived experimentally.
2036   const double ref_dth = 5;
2037   int result = 0;
2038   for (int i = 0; i < ref_set; ++i) {
2039     if (mask_check_bit(good_indices, i) &&
2040         (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2041         (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2042       mask_set_bit(&result, i);
2043     }
2044   }
2045   return result;
2046 }
2047 
2048 /*!\brief Motion mode information for inter mode search speedup.
2049  *
2050  * Used in a speed feature to search motion modes other than
2051  * SIMPLE_TRANSLATION only on winning candidates.
2052  */
2053 typedef struct motion_mode_candidate {
2054   /*!
2055    * Mode info for the motion mode candidate.
2056    */
2057   MB_MODE_INFO mbmi;
2058   /*!
2059    * Rate describing the cost of the motion vectors for this candidate.
2060    */
2061   int rate_mv;
2062   /*!
2063    * Rate before motion mode search and transform coding is applied.
2064    */
2065   int rate2_nocoeff;
2066   /*!
2067    * An integer value 0 or 1 which indicates whether or not to skip the motion
2068    * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2069    * candidate.
2070    */
2071   int skip_motion_mode;
2072   /*!
2073    * Total RD cost for this candidate.
2074    */
2075   int64_t rd_cost;
2076 } motion_mode_candidate;
2077 
2078 /*!\cond */
2079 typedef struct motion_mode_best_st_candidate {
2080   motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2081   int num_motion_mode_cand;
2082 } motion_mode_best_st_candidate;
2083 
2084 // Checks if the current reference frame matches with neighbouring block's
2085 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2086 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2087                                                    MB_MODE_INFO *nb_mbmi) {
2088   MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2089                                           nb_mbmi->ref_frame[1] };
2090   MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2091                                            cur_mbmi->ref_frame[1] };
2092   const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2093   int match_found = 0;
2094 
2095   for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2096     if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2097         (cur_ref_frames[i] == nb_ref_frames[1]))
2098       match_found = 1;
2099   }
2100   return match_found;
2101 }
2102 
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2103 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2104                                                   MACROBLOCKD *xd) {
2105   if (!xd->up_available) return 1;
2106   const int mi_col = xd->mi_col;
2107   MB_MODE_INFO **cur_mbmi = xd->mi;
2108   // prev_row_mi points into the mi array, starting at the beginning of the
2109   // previous row.
2110   MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2111   const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2112   uint8_t mi_step;
2113   for (int above_mi_col = mi_col; above_mi_col < end_col;
2114        above_mi_col += mi_step) {
2115     MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2116     mi_step = mi_size_wide[above_mi[0]->bsize];
2117     int match_found = 0;
2118     if (is_inter_block(*above_mi))
2119       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2120     if (match_found) return 1;
2121   }
2122   return 0;
2123 }
2124 
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2125 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2126                                                  MACROBLOCKD *xd) {
2127   if (!xd->left_available) return 1;
2128   const int mi_row = xd->mi_row;
2129   MB_MODE_INFO **cur_mbmi = xd->mi;
2130   // prev_col_mi points into the mi array, starting at the top of the
2131   // previous column
2132   MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2133   const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2134   uint8_t mi_step;
2135   for (int left_mi_row = mi_row; left_mi_row < end_row;
2136        left_mi_row += mi_step) {
2137     MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2138     mi_step = mi_size_high[left_mi[0]->bsize];
2139     int match_found = 0;
2140     if (is_inter_block(*left_mi))
2141       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2142     if (match_found) return 1;
2143   }
2144   return 0;
2145 }
2146 /*!\endcond */
2147 
2148 /*! \brief Struct used to hold TPL data to
2149  * narrow down parts of the inter mode search.
2150  */
2151 typedef struct {
2152   /*!
2153    * The best inter cost out of all of the reference frames.
2154    */
2155   int64_t best_inter_cost;
2156   /*!
2157    * The inter cost for each reference frame.
2158    */
2159   int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2160 } PruneInfoFromTpl;
2161 
2162 #if !CONFIG_REALTIME_ONLY
2163 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2164 static AOM_INLINE void get_block_level_tpl_stats(
2165     AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2166     PruneInfoFromTpl *inter_cost_info_from_tpl) {
2167   const GF_GROUP *const gf_group = &cpi->gf_group;
2168   AV1_COMMON *const cm = &cpi->common;
2169 
2170   assert(IMPLIES(gf_group->size > 0, gf_group->index < gf_group->size));
2171   const int tpl_idx = gf_group->index;
2172   TplParams *const tpl_data = &cpi->tpl_data;
2173   const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2174   if (tpl_idx >= MAX_TPL_FRAME_IDX || !tpl_frame->is_valid) {
2175     return;
2176   }
2177 
2178   const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2179   const int mi_wide = mi_size_wide[bsize];
2180   const int mi_high = mi_size_high[bsize];
2181   const int tpl_stride = tpl_frame->stride;
2182   const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2183   const int mi_col_sr =
2184       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2185   const int mi_col_end_sr =
2186       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2187   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2188 
2189   const int row_step = step;
2190   const int col_step_sr =
2191       coded_to_superres_mi(step, cm->superres_scale_denominator);
2192   for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2193        row += row_step) {
2194     for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2195          col += col_step_sr) {
2196       const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2197           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2198 
2199       // Sums up the inter cost of corresponding ref frames
2200       for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2201         inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2202             this_stats->pred_error[ref_idx];
2203       }
2204     }
2205   }
2206 
2207   // Computes the best inter cost (minimum inter_cost)
2208   int64_t best_inter_cost = INT64_MAX;
2209   for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2210     const int64_t cur_inter_cost =
2211         inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2212     // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2213     // calculating the minimum inter_cost
2214     if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2215         valid_refs[ref_idx])
2216       best_inter_cost = cur_inter_cost;
2217   }
2218   inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2219 }
2220 #endif
2221 
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2222 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2223     PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2224     const PREDICTION_MODE this_mode, int prune_mode_level) {
2225   const int have_newmv = have_newmv_in_inter_mode(this_mode);
2226   if ((prune_mode_level < 2) && have_newmv) return 0;
2227 
2228   const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2229   if (best_inter_cost == INT64_MAX) return 0;
2230 
2231   const int prune_level = prune_mode_level - 1;
2232   int64_t cur_inter_cost;
2233 
2234   const int is_globalmv =
2235       (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2236   const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2237 
2238   // Thresholds used for pruning:
2239   // Lower value indicates aggressive pruning and higher value indicates
2240   // conservative pruning which is set based on ref_mv_idx and speed feature.
2241   // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2242   // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2243   static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2244     { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2245   };
2246 
2247   const int is_comp_pred = (refs[1] > INTRA_FRAME);
2248   if (!is_comp_pred) {
2249     cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2250   } else {
2251     const int64_t inter_cost_ref0 =
2252         inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2253     const int64_t inter_cost_ref1 =
2254         inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2255     // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2256     // more aggressive pruning
2257     cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2258   }
2259 
2260   // Prune the mode if cur_inter_cost is greater than threshold times
2261   // best_inter_cost
2262   if (cur_inter_cost >
2263       ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2264         best_inter_cost) >>
2265        2))
2266     return 1;
2267   return 0;
2268 }
2269 
2270 // If the current mode being searched is NEWMV, this function will look
2271 // at previously searched MVs and check if they are the same
2272 // as the current MV. If it finds that this MV is repeated, it compares
2273 // the cost to the previous MV and skips the rest of the search if it is
2274 // more expensive.
skip_repeated_newmv(AV1_COMP * const cpi,MACROBLOCK * x,BLOCK_SIZE bsize,const int do_tx_search,const PREDICTION_MODE this_mode,MB_MODE_INFO * best_mbmi,motion_mode_candidate * motion_mode_cand,int64_t * ref_best_rd,RD_STATS * best_rd_stats,RD_STATS * best_rd_stats_y,RD_STATS * best_rd_stats_uv,inter_mode_info * mode_info,HandleInterModeArgs * args,int drl_cost,const int * refs,int_mv * cur_mv,int64_t * best_rd,const BUFFER_SET orig_dst,int ref_mv_idx)2275 static int skip_repeated_newmv(
2276     AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
2277     const int do_tx_search, const PREDICTION_MODE this_mode,
2278     MB_MODE_INFO *best_mbmi, motion_mode_candidate *motion_mode_cand,
2279     int64_t *ref_best_rd, RD_STATS *best_rd_stats, RD_STATS *best_rd_stats_y,
2280     RD_STATS *best_rd_stats_uv, inter_mode_info *mode_info,
2281     HandleInterModeArgs *args, int drl_cost, const int *refs, int_mv *cur_mv,
2282     int64_t *best_rd, const BUFFER_SET orig_dst, int ref_mv_idx) {
2283   // This feature only works for NEWMV when a previous mv has been searched
2284   if (this_mode != NEWMV || ref_mv_idx == 0) return 0;
2285   MACROBLOCKD *xd = &x->e_mbd;
2286   const AV1_COMMON *cm = &cpi->common;
2287   const int num_planes = av1_num_planes(cm);
2288 
2289   int skip = 0;
2290   int this_rate_mv = 0;
2291   int i;
2292   for (i = 0; i < ref_mv_idx; ++i) {
2293     // Check if the motion search result same as previous results
2294     if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int &&
2295         args->single_newmv_valid[i][refs[0]]) {
2296       // If the compared mode has no valid rd, it is unlikely this
2297       // mode will be the best mode
2298       if (mode_info[i].rd == INT64_MAX) {
2299         skip = 1;
2300         break;
2301       }
2302       // Compare the cost difference including drl cost and mv cost
2303       if (mode_info[i].mv.as_int != INVALID_MV) {
2304         const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost;
2305         const int_mv ref_mv = av1_get_ref_mv(x, 0);
2306         this_rate_mv = av1_mv_bit_cost(
2307             &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
2308             x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
2309         const int this_cost = this_rate_mv + drl_cost;
2310 
2311         if (compare_cost <= this_cost) {
2312           // Skip this mode if it is more expensive as the previous result
2313           // for this MV
2314           skip = 1;
2315           break;
2316         } else {
2317           // If the cost is less than current best result, make this
2318           // the best and update corresponding variables unless the
2319           // best_mv is the same as ref_mv. In this case we skip and
2320           // rely on NEAR(EST)MV instead
2321           if (best_mbmi->ref_mv_idx == i &&
2322               best_mbmi->mv[0].as_int != ref_mv.as_int) {
2323             assert(*best_rd != INT64_MAX);
2324             assert(best_mbmi->mv[0].as_int == mode_info[i].mv.as_int);
2325             best_mbmi->ref_mv_idx = ref_mv_idx;
2326             motion_mode_cand->rate_mv = this_rate_mv;
2327             best_rd_stats->rate += this_cost - compare_cost;
2328             *best_rd =
2329                 RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
2330             // We also need to update mode_info here because we are setting
2331             // (ref_)best_rd here. So we will not be able to search the same
2332             // mode again with the current configuration.
2333             mode_info[ref_mv_idx].mv.as_int = best_mbmi->mv[0].as_int;
2334             mode_info[ref_mv_idx].rate_mv = this_rate_mv;
2335             mode_info[ref_mv_idx].rd = *best_rd;
2336             if (*best_rd < *ref_best_rd) *ref_best_rd = *best_rd;
2337             break;
2338           }
2339         }
2340       }
2341     }
2342   }
2343   if (skip) {
2344     const THR_MODES mode_enum = get_prediction_mode_idx(
2345         best_mbmi->mode, best_mbmi->ref_frame[0], best_mbmi->ref_frame[1]);
2346     // Collect mode stats for multiwinner mode processing
2347     store_winner_mode_stats(
2348         &cpi->common, x, best_mbmi, best_rd_stats, best_rd_stats_y,
2349         best_rd_stats_uv, mode_enum, NULL, bsize, *best_rd,
2350         cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
2351     args->modelled_rd[this_mode][ref_mv_idx][refs[0]] =
2352         args->modelled_rd[this_mode][i][refs[0]];
2353     args->simple_rd[this_mode][ref_mv_idx][refs[0]] =
2354         args->simple_rd[this_mode][i][refs[0]];
2355     mode_info[ref_mv_idx].rd = mode_info[i].rd;
2356     mode_info[ref_mv_idx].rate_mv = this_rate_mv;
2357     mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int;
2358 
2359     restore_dst_buf(xd, orig_dst, num_planes);
2360     return 1;
2361   }
2362   return 0;
2363 }
2364 
2365 /*!\brief High level function to select parameters for compound mode.
2366  *
2367  * \ingroup inter_mode_search
2368  * The main search functionality is done in the call to av1_compound_type_rd().
2369  *
2370  * \param[in]     cpi               Top-level encoder structure.
2371  * \param[in]     x                 Pointer to struct holding all the data for
2372  *                                  the current macroblock.
2373  * \param[in]     args              HandleInterModeArgs struct holding
2374  *                                  miscellaneous arguments for inter mode
2375  *                                  search. See the documentation for this
2376  *                                  struct for a description of each member.
2377  * \param[in]     ref_best_rd       Best RD found so far for this block.
2378  *                                  It is used for early termination of this
2379  *                                  search if the RD exceeds this value.
2380  * \param[in,out] cur_mv            Current motion vector.
2381  * \param[in]     bsize             Current block size.
2382  * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2383                                     compound mode.
2384  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2385  *                                  allocated buffers for the compound
2386  *                                  predictors and masks in the compound type
2387  *                                  search.
2388  * \param[in,out] orig_dst          A prediction buffer to hold a computed
2389  *                                  prediction. This will eventually hold the
2390  *                                  final prediction, and the tmp_dst info will
2391  *                                  be copied here.
2392  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2393  *                                  computed prediction.
2394  * \param[in,out] rate_mv           The rate associated with the motion vectors.
2395  *                                  This will be modified if a motion search is
2396  *                                  done in the motion mode search.
2397  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2398  *                                  information.
2399  * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2400  *                                  best total RD for a skip mode so far, and
2401  *                                  skip_rd[1] is the best RD for a skip mode so
2402  *                                  far in luma. This is used as a speed feature
2403  *                                  to skip the transform search if the computed
2404  *                                  skip RD for the current mode is not better
2405  *                                  than the best skip_rd so far.
2406  * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2407  *                                  predictor. If this is 0, the inter predictor
2408  *                                  has already been built and thus we can avoid
2409  *                                  repeating computation.
2410  * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2411  * a viable candidate.
2412  */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2413 static int process_compound_inter_mode(
2414     AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2415     int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2416     int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2417     const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2418     RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2419   MACROBLOCKD *xd = &x->e_mbd;
2420   MB_MODE_INFO *mbmi = xd->mi[0];
2421   const AV1_COMMON *cm = &cpi->common;
2422   const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2423                                    cm->seq_params.enable_masked_compound;
2424   int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2425                          (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2426 
2427   const int num_planes = av1_num_planes(cm);
2428   const int mi_row = xd->mi_row;
2429   const int mi_col = xd->mi_col;
2430   int is_luma_interp_done = 0;
2431   set_default_interp_filters(mbmi, cm->features.interp_filter);
2432 
2433   int64_t best_rd_compound;
2434   int64_t rd_thresh;
2435   const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2436   const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2437   rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2438                                          comp_type_rd_scale);
2439   // Select compound type and any parameters related to that type
2440   // (for example, the mask parameters if it is a masked mode) and compute
2441   // the RD
2442   *compmode_interinter_cost = av1_compound_type_rd(
2443       cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2444       orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2445       ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2446   if (ref_best_rd < INT64_MAX &&
2447       (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2448           ref_best_rd) {
2449     restore_dst_buf(xd, *orig_dst, num_planes);
2450     return 1;
2451   }
2452 
2453   // Build only uv predictor for COMPOUND_AVERAGE.
2454   // Note there is no need to call av1_enc_build_inter_predictor
2455   // for luma if COMPOUND_AVERAGE is selected because it is the first
2456   // candidate in av1_compound_type_rd, which means it used the dst_buf
2457   // rather than the tmp_buf.
2458   if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2459     if (num_planes > 1) {
2460       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2461                                     AOM_PLANE_U, num_planes - 1);
2462     }
2463     *skip_build_pred = 1;
2464   }
2465   return 0;
2466 }
2467 
2468 // Speed feature to prune out MVs that are similar to previous MVs if they
2469 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2470 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2471                                    int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2472                                    MB_MODE_INFO *mbmi, int pruning_factor) {
2473   int i;
2474   const int is_comp_pred = has_second_ref(mbmi);
2475   const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2476 
2477   // Skip the evaluation if an MV match is found.
2478   if (ref_mv_idx > 0) {
2479     for (int idx = 0; idx < ref_mv_idx; ++idx) {
2480       if (save_mv[idx][0].as_int == INVALID_MV) continue;
2481 
2482       int mv_diff = 0;
2483       for (i = 0; i < 1 + is_comp_pred; ++i) {
2484         mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2485                    abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2486       }
2487 
2488       // If this mode is not the best one, and current MV is similar to
2489       // previous stored MV, terminate this ref_mv_idx evaluation.
2490       if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2491     }
2492   }
2493 
2494   if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2495     for (i = 0; i < is_comp_pred + 1; ++i)
2496       save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2497   }
2498 
2499   return 0;
2500 }
2501 
2502 /*!\brief AV1 inter mode RD computation
2503  *
2504  * \ingroup inter_mode_search
2505  * Do the RD search for a given inter mode and compute all information relevant
2506  * to the input mode. It will compute the best MV,
2507  * compound parameters (if the mode is a compound mode) and interpolation filter
2508  * parameters.
2509  *
2510  * \param[in]     cpi               Top-level encoder structure.
2511  * \param[in]     tile_data         Pointer to struct holding adaptive
2512  *                                  data/contexts/models for the tile during
2513  *                                  encoding.
2514  * \param[in]     x                 Pointer to structure holding all the data
2515  *                                  for the current macroblock.
2516  * \param[in]     bsize             Current block size.
2517  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2518  *                                  information.
2519  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2520  *                                  for only the Y plane.
2521  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2522  *                                  for only the UV planes.
2523  * \param[in]     args              HandleInterModeArgs struct holding
2524  *                                  miscellaneous arguments for inter mode
2525  *                                  search. See the documentation for this
2526  *                                  struct for a description of each member.
2527  * \param[in]     ref_best_rd       Best RD found so far for this block.
2528  *                                  It is used for early termination of this
2529  *                                  search if the RD exceeds this value.
2530  * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2531  *                                  built in this search.
2532  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2533  *                                  allocated buffers for the compound
2534  *                                  predictors and masks in the compound type
2535  *                                  search.
2536  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2537  *                                  do_tx_search (see below) is 0.
2538  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2539  *                                  a full transform search. This will compute
2540  *                                  an estimated RD for the modes without the
2541  *                                  transform search and later perform the full
2542  *                                  transform search on the best candidates.
2543  * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2544  *                                  information to perform a full transform
2545  *                                  search only on winning candidates searched
2546  *                                  with an estimate for transform coding RD.
2547  * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2548  *                                  motion mode information used in a speed
2549  *                                  feature to search motion modes other than
2550  *                                  SIMPLE_TRANSLATION only on winning
2551  *                                  candidates.
2552  * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2553  *                                  best total RD for a skip mode so far, and
2554  *                                  skip_rd[1] is the best RD for a skip mode so
2555  *                                  far in luma. This is used as a speed feature
2556  *                                  to skip the transform search if the computed
2557  *                                  skip RD for the current mode is not better
2558  *                                  than the best skip_rd so far.
2559  * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2560  *                                         narrow down the search based on data
2561  *                                         collected in the TPL model.
2562  * \param[out]    yrd               Stores the rdcost corresponding to encoding
2563  *                                  the luma plane.
2564  *
2565  * \return The RD cost for the mode being searched.
2566  */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2567 static int64_t handle_inter_mode(
2568     AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2569     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2570     RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2571     uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2572     int64_t *best_est_rd, const int do_tx_search,
2573     InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2574     int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2575     int64_t *yrd) {
2576   const AV1_COMMON *cm = &cpi->common;
2577   const int num_planes = av1_num_planes(cm);
2578   MACROBLOCKD *xd = &x->e_mbd;
2579   MB_MODE_INFO *mbmi = xd->mi[0];
2580   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2581   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2582   const int is_comp_pred = has_second_ref(mbmi);
2583   const PREDICTION_MODE this_mode = mbmi->mode;
2584 
2585   const GF_GROUP *const gf_group = &cpi->gf_group;
2586   const int tpl_idx = gf_group->index;
2587   TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
2588   const int prune_modes_based_on_tpl =
2589       cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2590       tpl_idx < MAX_TPL_FRAME_IDX && tpl_frame->is_valid;
2591   int i;
2592   // Reference frames for this mode
2593   const int refs[2] = { mbmi->ref_frame[0],
2594                         (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2595   int rate_mv = 0;
2596   int64_t rd = INT64_MAX;
2597   // Do first prediction into the destination buffer. Do the next
2598   // prediction into a temporary buffer. Then keep track of which one
2599   // of these currently holds the best predictor, and use the other
2600   // one for future predictions. In the end, copy from tmp_buf to
2601   // dst if necessary.
2602   struct macroblockd_plane *p = xd->plane;
2603   const BUFFER_SET orig_dst = {
2604     { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
2605     { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
2606   };
2607   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2608                                  tmp_buf + 2 * MAX_SB_SQUARE },
2609                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2610 
2611   int64_t ret_val = INT64_MAX;
2612   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2613   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2614   int64_t best_rd = INT64_MAX;
2615   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2616   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2617   int64_t best_yrd = INT64_MAX;
2618   MB_MODE_INFO best_mbmi = *mbmi;
2619   int best_xskip_txfm = 0;
2620   int64_t newmv_ret_val = INT64_MAX;
2621   inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2622 
2623   // Do not prune the mode based on inter cost from tpl if the current ref frame
2624   // is the winner ref in neighbouring blocks.
2625   int ref_match_found_in_above_nb = 0;
2626   int ref_match_found_in_left_nb = 0;
2627   if (prune_modes_based_on_tpl) {
2628     ref_match_found_in_above_nb =
2629         find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2630     ref_match_found_in_left_nb =
2631         find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2632   }
2633 
2634   // First, perform a simple translation search for each of the indices. If
2635   // an index performs well, it will be fully searched in the main loop
2636   // of this function.
2637   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2638   // Save MV results from first 2 ref_mv_idx.
2639   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2640   int best_ref_mv_idx = -1;
2641   const int idx_mask = ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd,
2642                                             mode_info, bsize, ref_set);
2643   const int16_t mode_ctx =
2644       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2645   const ModeCosts *mode_costs = &x->mode_costs;
2646   const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2647   const int base_rate =
2648       args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2649 
2650   for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2651     save_mv[i][0].as_int = INVALID_MV;
2652     save_mv[i][1].as_int = INVALID_MV;
2653   }
2654 
2655   // Main loop of this function. This will  iterate over all of the ref mvs
2656   // in the dynamic reference list and do the following:
2657   //    1.) Get the current MV. Create newmv MV if necessary
2658   //    2.) Search compound type and parameters if applicable
2659   //    3.) Do interpolation filter search
2660   //    4.) Build the inter predictor
2661   //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2662   //        WARPED_CAUSAL)
2663   //    6.) Update stats if best so far
2664   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2665     mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2666     mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
2667     mode_info[ref_mv_idx].rd = INT64_MAX;
2668 
2669     if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2670       // MV did not perform well in simple translation search. Skip it.
2671       continue;
2672     }
2673     if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2674         !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2675       // Skip mode if TPL model indicates it will not be beneficial.
2676       if (prune_modes_based_on_tpl_stats(
2677               inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2678               cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2679         continue;
2680     }
2681     av1_init_rd_stats(rd_stats);
2682 
2683     // Initialize compound mode data
2684     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2685     mbmi->comp_group_idx = 0;
2686     mbmi->compound_idx = 1;
2687     if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2688 
2689     mbmi->num_proj_ref = 0;
2690     mbmi->motion_mode = SIMPLE_TRANSLATION;
2691     mbmi->ref_mv_idx = ref_mv_idx;
2692 
2693     // Compute cost for signalling this DRL index
2694     rd_stats->rate = base_rate;
2695     const int drl_cost = get_drl_cost(
2696         mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2697     rd_stats->rate += drl_cost;
2698     mode_info[ref_mv_idx].drl_cost = drl_cost;
2699 
2700     int rs = 0;
2701     int compmode_interinter_cost = 0;
2702 
2703     int_mv cur_mv[2];
2704 
2705     // TODO(Cherma): Extend this speed feature to support compound mode
2706     int skip_repeated_ref_mv =
2707         is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2708     // Generate the current mv according to the prediction mode
2709     if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2710       continue;
2711     }
2712 
2713     // The above call to build_cur_mv does not handle NEWMV modes. Build
2714     // the mv here if we have NEWMV for any predictors.
2715     if (have_newmv_in_inter_mode(this_mode)) {
2716 #if CONFIG_COLLECT_COMPONENT_TIMING
2717       start_timing(cpi, handle_newmv_time);
2718 #endif
2719       newmv_ret_val =
2720           handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2721 #if CONFIG_COLLECT_COMPONENT_TIMING
2722       end_timing(cpi, handle_newmv_time);
2723 #endif
2724 
2725       if (newmv_ret_val != 0) continue;
2726 
2727       rd_stats->rate += rate_mv;
2728 
2729       // skip NEWMV mode in drl if the motion search result is the same
2730       // as a previous result
2731       if (cpi->sf.inter_sf.skip_repeated_newmv &&
2732           skip_repeated_newmv(cpi, x, bsize, do_tx_search, this_mode,
2733                               &best_mbmi, motion_mode_cand, &ref_best_rd,
2734                               &best_rd_stats, &best_rd_stats_y,
2735                               &best_rd_stats_uv, mode_info, args, drl_cost,
2736                               refs, cur_mv, &best_rd, orig_dst, ref_mv_idx))
2737         continue;
2738     }
2739     // Copy the motion vector for this mode into mbmi struct
2740     for (i = 0; i < is_comp_pred + 1; ++i) {
2741       mbmi->mv[i].as_int = cur_mv[i].as_int;
2742     }
2743 
2744     if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2745         mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2746       continue;
2747     }
2748 
2749     // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2750     // is enabled, and the current MV is similar to a previous one.
2751     if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2752         prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2753                                 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2754       continue;
2755 
2756 #if CONFIG_COLLECT_COMPONENT_TIMING
2757     start_timing(cpi, compound_type_rd_time);
2758 #endif
2759     int skip_build_pred = 0;
2760     const int mi_row = xd->mi_row;
2761     const int mi_col = xd->mi_col;
2762 
2763     // Handle a compound predictor, continue if it is determined this
2764     // cannot be the best compound mode
2765     if (is_comp_pred) {
2766       const int not_best_mode = process_compound_inter_mode(
2767           cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2768           rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2769           &skip_build_pred);
2770       if (not_best_mode) continue;
2771     }
2772 
2773 #if CONFIG_COLLECT_COMPONENT_TIMING
2774     end_timing(cpi, compound_type_rd_time);
2775 #endif
2776 
2777 #if CONFIG_COLLECT_COMPONENT_TIMING
2778     start_timing(cpi, interpolation_filter_search_time);
2779 #endif
2780     // Determine the interpolation filter for this mode
2781     ret_val = av1_interpolation_filter_search(
2782         x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2783         &skip_build_pred, args, ref_best_rd);
2784 #if CONFIG_COLLECT_COMPONENT_TIMING
2785     end_timing(cpi, interpolation_filter_search_time);
2786 #endif
2787     if (args->modelled_rd != NULL && !is_comp_pred) {
2788       args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
2789     }
2790     if (ret_val != 0) {
2791       restore_dst_buf(xd, orig_dst, num_planes);
2792       continue;
2793     } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
2794                ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
2795       restore_dst_buf(xd, orig_dst, num_planes);
2796       continue;
2797     }
2798 
2799     // Compute modelled RD if enabled
2800     if (args->modelled_rd != NULL) {
2801       if (is_comp_pred) {
2802         const int mode0 = compound_ref0_mode(this_mode);
2803         const int mode1 = compound_ref1_mode(this_mode);
2804         const int64_t mrd =
2805             AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
2806                    args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
2807         if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
2808           restore_dst_buf(xd, orig_dst, num_planes);
2809           continue;
2810         }
2811       }
2812     }
2813     rd_stats->rate += compmode_interinter_cost;
2814     if (skip_build_pred != 1) {
2815       // Build this inter predictor if it has not been previously built
2816       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
2817                                     av1_num_planes(cm) - 1);
2818     }
2819 
2820 #if CONFIG_COLLECT_COMPONENT_TIMING
2821     start_timing(cpi, motion_mode_rd_time);
2822 #endif
2823     int rate2_nocoeff = rd_stats->rate;
2824     // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
2825     // OBMC_CAUSAL or WARPED_CAUSAL
2826     int64_t this_yrd;
2827     ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
2828                              rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
2829                              &orig_dst, best_est_rd, do_tx_search,
2830                              inter_modes_info, 0, &this_yrd);
2831 #if CONFIG_COLLECT_COMPONENT_TIMING
2832     end_timing(cpi, motion_mode_rd_time);
2833 #endif
2834     assert(
2835         IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
2836 
2837     if (ret_val != INT64_MAX) {
2838       int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2839       if (tmp_rd < mode_info[ref_mv_idx].rd) {
2840         // Only update mode_info if the new result is actually better.
2841         mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
2842         mode_info[ref_mv_idx].rate_mv = rate_mv;
2843         mode_info[ref_mv_idx].rd = tmp_rd;
2844       }
2845       const THR_MODES mode_enum = get_prediction_mode_idx(
2846           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2847       // Collect mode stats for multiwinner mode processing
2848       store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
2849                               rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
2850                               cpi->sf.winner_mode_sf.multi_winner_mode_type,
2851                               do_tx_search);
2852       if (tmp_rd < best_rd) {
2853         best_yrd = this_yrd;
2854         // Update the best rd stats if we found the best mode so far
2855         best_rd_stats = *rd_stats;
2856         best_rd_stats_y = *rd_stats_y;
2857         best_rd_stats_uv = *rd_stats_uv;
2858         best_rd = tmp_rd;
2859         best_mbmi = *mbmi;
2860         best_xskip_txfm = txfm_info->skip_txfm;
2861         memcpy(best_blk_skip, txfm_info->blk_skip,
2862                sizeof(best_blk_skip[0]) * xd->height * xd->width);
2863         av1_copy_array(best_tx_type_map, xd->tx_type_map,
2864                        xd->height * xd->width);
2865         motion_mode_cand->rate_mv = rate_mv;
2866         motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
2867       }
2868 
2869       if (tmp_rd < ref_best_rd) {
2870         ref_best_rd = tmp_rd;
2871         best_ref_mv_idx = ref_mv_idx;
2872       }
2873     }
2874     restore_dst_buf(xd, orig_dst, num_planes);
2875   }
2876 
2877   if (best_rd == INT64_MAX) return INT64_MAX;
2878 
2879   // re-instate status of the best choice
2880   *rd_stats = best_rd_stats;
2881   *rd_stats_y = best_rd_stats_y;
2882   *rd_stats_uv = best_rd_stats_uv;
2883   *yrd = best_yrd;
2884   *mbmi = best_mbmi;
2885   txfm_info->skip_txfm = best_xskip_txfm;
2886   assert(IMPLIES(mbmi->comp_group_idx == 1,
2887                  mbmi->interinter_comp.type != COMPOUND_AVERAGE));
2888   memcpy(txfm_info->blk_skip, best_blk_skip,
2889          sizeof(best_blk_skip[0]) * xd->height * xd->width);
2890   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
2891 
2892   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2893 
2894   return rd_stats->rdcost;
2895 }
2896 
2897 /*!\brief Search for the best intrabc predictor
2898  *
2899  * \ingroup intra_mode_search
2900  * \callergraph
2901  * This function performs a motion search to find the best intrabc predictor.
2902  *
2903  * \returns Returns the best overall rdcost (including the non-intrabc modes
2904  * search before this function).
2905  */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)2906 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
2907                                        PICK_MODE_CONTEXT *ctx,
2908                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
2909                                        int64_t best_rd) {
2910   const AV1_COMMON *const cm = &cpi->common;
2911   if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc)
2912     return INT64_MAX;
2913   const int num_planes = av1_num_planes(cm);
2914 
2915   MACROBLOCKD *const xd = &x->e_mbd;
2916   const TileInfo *tile = &xd->tile;
2917   MB_MODE_INFO *mbmi = xd->mi[0];
2918   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2919 
2920   const int mi_row = xd->mi_row;
2921   const int mi_col = xd->mi_col;
2922   const int w = block_size_wide[bsize];
2923   const int h = block_size_high[bsize];
2924   const int sb_row = mi_row >> cm->seq_params.mib_size_log2;
2925   const int sb_col = mi_col >> cm->seq_params.mib_size_log2;
2926 
2927   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2928   MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
2929   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
2930                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
2931                    mbmi_ext->mode_context);
2932   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
2933   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
2934   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
2935   int_mv nearestmv, nearmv;
2936   av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
2937                                    0);
2938 
2939   if (nearestmv.as_int == INVALID_MV) {
2940     nearestmv.as_int = 0;
2941   }
2942   if (nearmv.as_int == INVALID_MV) {
2943     nearmv.as_int = 0;
2944   }
2945 
2946   int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
2947   if (dv_ref.as_int == 0) {
2948     av1_find_ref_dv(&dv_ref, tile, cm->seq_params.mib_size, mi_row);
2949   }
2950   // Ref DV should not have sub-pel.
2951   assert((dv_ref.as_mv.col & 7) == 0);
2952   assert((dv_ref.as_mv.row & 7) == 0);
2953   mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
2954 
2955   struct buf_2d yv12_mb[MAX_MB_PLANE];
2956   av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
2957   for (int i = 0; i < num_planes; ++i) {
2958     xd->plane[i].pre[0] = yv12_mb[i];
2959   }
2960 
2961   enum IntrabcMotionDirection {
2962     IBC_MOTION_ABOVE,
2963     IBC_MOTION_LEFT,
2964     IBC_MOTION_DIRECTIONS
2965   };
2966 
2967   MB_MODE_INFO best_mbmi = *mbmi;
2968   RD_STATS best_rdstats = *rd_stats;
2969   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
2970   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2971   av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
2972 
2973   FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
2974   const search_site_config *lookahead_search_sites =
2975       cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
2976   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
2977                                      &dv_ref.as_mv, lookahead_search_sites,
2978                                      /*fine_search_interval=*/0);
2979   fullms_params.is_intra_mode = 1;
2980 
2981   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
2982        dir < IBC_MOTION_DIRECTIONS; ++dir) {
2983     switch (dir) {
2984       case IBC_MOTION_ABOVE:
2985         fullms_params.mv_limits.col_min =
2986             (tile->mi_col_start - mi_col) * MI_SIZE;
2987         fullms_params.mv_limits.col_max =
2988             (tile->mi_col_end - mi_col) * MI_SIZE - w;
2989         fullms_params.mv_limits.row_min =
2990             (tile->mi_row_start - mi_row) * MI_SIZE;
2991         fullms_params.mv_limits.row_max =
2992             (sb_row * cm->seq_params.mib_size - mi_row) * MI_SIZE - h;
2993         break;
2994       case IBC_MOTION_LEFT:
2995         fullms_params.mv_limits.col_min =
2996             (tile->mi_col_start - mi_col) * MI_SIZE;
2997         fullms_params.mv_limits.col_max =
2998             (sb_col * cm->seq_params.mib_size - mi_col) * MI_SIZE - w;
2999         // TODO(aconverse@google.com): Minimize the overlap between above and
3000         // left areas.
3001         fullms_params.mv_limits.row_min =
3002             (tile->mi_row_start - mi_row) * MI_SIZE;
3003         int bottom_coded_mi_edge =
3004             AOMMIN((sb_row + 1) * cm->seq_params.mib_size, tile->mi_row_end);
3005         fullms_params.mv_limits.row_max =
3006             (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3007         break;
3008       default: assert(0);
3009     }
3010     assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3011     assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3012     assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3013     assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3014 
3015     av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3016 
3017     if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3018         fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3019       continue;
3020     }
3021 
3022     const int step_param = cpi->mv_search_params.mv_step_param;
3023     const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3024     IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3025     int_mv best_mv, best_hash_mv;
3026 
3027     int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
3028                                         NULL, &best_mv.as_fullmv, NULL);
3029     const int hashsme = av1_intrabc_hash_search(
3030         cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3031     if (hashsme < bestsme) {
3032       best_mv = best_hash_mv;
3033       bestsme = hashsme;
3034     }
3035 
3036     if (bestsme == INT_MAX) continue;
3037     const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3038     if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3039                                 get_fullmv_from_mv(&dv)))
3040       continue;
3041     if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3042                          cm->seq_params.mib_size_log2))
3043       continue;
3044 
3045     // DV should not have sub-pel.
3046     assert((dv.col & 7) == 0);
3047     assert((dv.row & 7) == 0);
3048     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3049     mbmi->filter_intra_mode_info.use_filter_intra = 0;
3050     mbmi->use_intrabc = 1;
3051     mbmi->mode = DC_PRED;
3052     mbmi->uv_mode = UV_DC_PRED;
3053     mbmi->motion_mode = SIMPLE_TRANSLATION;
3054     mbmi->mv[0].as_mv = dv;
3055     mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3056     mbmi->skip_txfm = 0;
3057     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3058                                   av1_num_planes(cm) - 1);
3059 
3060     const IntraBCMVCosts *const dv_costs = &cpi->dv_costs;
3061     int *dvcost[2] = { (int *)&dv_costs->mv_component[0][MV_MAX],
3062                        (int *)&dv_costs->mv_component[1][MV_MAX] };
3063     // TODO(aconverse@google.com): The full motion field defining discount
3064     // in MV_COST_WEIGHT is too large. Explore other values.
3065     const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3066                                         dvcost, MV_COST_WEIGHT_SUB);
3067     const int rate_mode = x->mode_costs.intrabc_cost[1];
3068     RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3069     if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3070                          &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3071       continue;
3072     rd_stats_yuv.rdcost =
3073         RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3074     if (rd_stats_yuv.rdcost < best_rd) {
3075       best_rd = rd_stats_yuv.rdcost;
3076       best_mbmi = *mbmi;
3077       best_rdstats = rd_stats_yuv;
3078       memcpy(best_blk_skip, txfm_info->blk_skip,
3079              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3080       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3081     }
3082   }
3083   *mbmi = best_mbmi;
3084   *rd_stats = best_rdstats;
3085   memcpy(txfm_info->blk_skip, best_blk_skip,
3086          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3087   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3088 #if CONFIG_RD_DEBUG
3089   mbmi->rd_stats = *rd_stats;
3090 #endif
3091   return best_rd;
3092 }
3093 
3094 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3095 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3096 // the typedef will prevent doxygen from finding this function and generating
3097 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3098 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3099 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3100                                struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3101                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3102   const AV1_COMMON *const cm = &cpi->common;
3103   MACROBLOCKD *const xd = &x->e_mbd;
3104   MB_MODE_INFO *const mbmi = xd->mi[0];
3105   const int num_planes = av1_num_planes(cm);
3106   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3107   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3108   int y_skip_txfm = 0, uv_skip_txfm = 0;
3109   int64_t dist_y = 0, dist_uv = 0;
3110 
3111   ctx->rd_stats.skip_txfm = 0;
3112   mbmi->ref_frame[0] = INTRA_FRAME;
3113   mbmi->ref_frame[1] = NONE_FRAME;
3114   mbmi->use_intrabc = 0;
3115   mbmi->mv[0].as_int = 0;
3116   mbmi->skip_mode = 0;
3117 
3118   const int64_t intra_yrd =
3119       av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3120                                  &y_skip_txfm, bsize, best_rd, ctx);
3121 
3122   // Initialize default mode evaluation params
3123   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3124 
3125   if (intra_yrd < best_rd) {
3126     // Search intra modes for uv planes if needed
3127     if (num_planes > 1) {
3128       // Set up the tx variables for reproducing the y predictions in case we
3129       // need it for chroma-from-luma.
3130       if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3131         memcpy(txfm_info->blk_skip, ctx->blk_skip,
3132                sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3133         av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3134       }
3135       const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3136       av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3137                                   &dist_uv, &uv_skip_txfm, bsize,
3138                                   max_uv_tx_size);
3139     }
3140 
3141     // Intra block is always coded as non-skip
3142     rd_cost->rate =
3143         rate_y + rate_uv +
3144         x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3145     rd_cost->dist = dist_y + dist_uv;
3146     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3147     rd_cost->skip_txfm = 0;
3148   } else {
3149     rd_cost->rate = INT_MAX;
3150   }
3151 
3152   if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3153     best_rd = rd_cost->rdcost;
3154   if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3155     ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3156     memcpy(ctx->blk_skip, txfm_info->blk_skip,
3157            sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3158     assert(rd_cost->rate != INT_MAX);
3159   }
3160   if (rd_cost->rate == INT_MAX) return;
3161 
3162   ctx->mic = *xd->mi[0];
3163   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3164                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
3165   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3166 }
3167 
3168 static AOM_INLINE void calc_target_weighted_pred(
3169     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3170     const uint8_t *above, int above_stride, const uint8_t *left,
3171     int left_stride);
3172 
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3173 static AOM_INLINE void rd_pick_skip_mode(
3174     RD_STATS *rd_cost, InterModeSearchState *search_state,
3175     const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3176     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3177   const AV1_COMMON *const cm = &cpi->common;
3178   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3179   const int num_planes = av1_num_planes(cm);
3180   MACROBLOCKD *const xd = &x->e_mbd;
3181   MB_MODE_INFO *const mbmi = xd->mi[0];
3182   const TxfmSearchParams *txfm_params = &x->txfm_search_params;
3183 
3184   x->compound_idx = 1;  // COMPOUND_AVERAGE
3185   RD_STATS skip_mode_rd_stats;
3186   av1_invalid_rd_stats(&skip_mode_rd_stats);
3187 
3188   if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3189       skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3190     return;
3191   }
3192 
3193   const MV_REFERENCE_FRAME ref_frame =
3194       LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3195   const MV_REFERENCE_FRAME second_ref_frame =
3196       LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3197   const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3198   const THR_MODES mode_index =
3199       get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3200 
3201   if (mode_index == THR_INVALID) {
3202     return;
3203   }
3204 
3205   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3206        cpi->sf.inter_sf.disable_onesided_comp) &&
3207       cpi->all_one_sided_refs) {
3208     return;
3209   }
3210 
3211   mbmi->mode = this_mode;
3212   mbmi->uv_mode = UV_DC_PRED;
3213   mbmi->ref_frame[0] = ref_frame;
3214   mbmi->ref_frame[1] = second_ref_frame;
3215   const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3216   if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3217     MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3218     if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3219         mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3220       return;
3221     }
3222     av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3223                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3224                      mbmi_ext->mode_context);
3225     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3226     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3227     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3228   }
3229 
3230   assert(this_mode == NEAREST_NEARESTMV);
3231   if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3232     return;
3233   }
3234 
3235   mbmi->filter_intra_mode_info.use_filter_intra = 0;
3236   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3237   mbmi->comp_group_idx = 0;
3238   mbmi->compound_idx = x->compound_idx;
3239   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3240   mbmi->motion_mode = SIMPLE_TRANSLATION;
3241   mbmi->ref_mv_idx = 0;
3242   mbmi->skip_mode = mbmi->skip_txfm = 1;
3243 
3244   set_default_interp_filters(mbmi, cm->features.interp_filter);
3245 
3246   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3247   for (int i = 0; i < num_planes; i++) {
3248     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3249     xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3250   }
3251 
3252   BUFFER_SET orig_dst;
3253   for (int i = 0; i < num_planes; i++) {
3254     orig_dst.plane[i] = xd->plane[i].dst.buf;
3255     orig_dst.stride[i] = xd->plane[i].dst.stride;
3256   }
3257 
3258   // Obtain the rdcost for skip_mode.
3259   skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst);
3260 
3261   // Compare the use of skip_mode with the best intra/inter mode obtained.
3262   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3263   int64_t best_intra_inter_mode_cost = INT64_MAX;
3264   if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3265     const ModeCosts *mode_costs = &x->mode_costs;
3266     best_intra_inter_mode_cost = RDCOST(
3267         x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3268         rd_cost->dist);
3269     // Account for non-skip mode rate in total rd stats
3270     rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3271     av1_rd_cost_update(x->rdmult, rd_cost);
3272   }
3273 
3274   if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3275       (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3276     assert(mode_index != THR_INVALID);
3277     search_state->best_mbmode.skip_mode = 1;
3278     search_state->best_mbmode = *mbmi;
3279 
3280     search_state->best_mbmode.skip_mode = search_state->best_mbmode.skip_txfm =
3281         1;
3282     search_state->best_mbmode.mode = NEAREST_NEARESTMV;
3283     search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0];
3284     search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1];
3285     search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int;
3286     search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int;
3287     search_state->best_mbmode.ref_mv_idx = 0;
3288 
3289     // Set up tx_size related variables for skip-specific loop filtering.
3290     search_state->best_mbmode.tx_size =
3291         block_signals_txsize(bsize)
3292             ? tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type)
3293             : max_txsize_rect_lookup[bsize];
3294     memset(search_state->best_mbmode.inter_tx_size,
3295            search_state->best_mbmode.tx_size,
3296            sizeof(search_state->best_mbmode.inter_tx_size));
3297     set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3298                   search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3299                   xd);
3300 
3301     // Set up color-related variables for skip mode.
3302     search_state->best_mbmode.uv_mode = UV_DC_PRED;
3303     search_state->best_mbmode.palette_mode_info.palette_size[0] = 0;
3304     search_state->best_mbmode.palette_mode_info.palette_size[1] = 0;
3305 
3306     search_state->best_mbmode.comp_group_idx = 0;
3307     search_state->best_mbmode.compound_idx = x->compound_idx;
3308     search_state->best_mbmode.interinter_comp.type = COMPOUND_AVERAGE;
3309     search_state->best_mbmode.motion_mode = SIMPLE_TRANSLATION;
3310 
3311     search_state->best_mbmode.interintra_mode =
3312         (INTERINTRA_MODE)(II_DC_PRED - 1);
3313     search_state->best_mbmode.filter_intra_mode_info.use_filter_intra = 0;
3314 
3315     set_default_interp_filters(&search_state->best_mbmode,
3316                                cm->features.interp_filter);
3317 
3318     search_state->best_mode_index = mode_index;
3319 
3320     // Update rd_cost
3321     rd_cost->rate = skip_mode_rd_stats.rate;
3322     rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3323     rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3324 
3325     search_state->best_rd = rd_cost->rdcost;
3326     search_state->best_skip2 = 1;
3327     search_state->best_mode_skippable = 1;
3328 
3329     x->txfm_search_info.skip_txfm = 1;
3330   }
3331 }
3332 
3333 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3334 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3335     MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3336     int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3337     RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3338     THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3339     int mode_idx) {
3340   MB_MODE_INFO *winner_mbmi;
3341   if (multi_winner_mode_type) {
3342     assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3343     WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3344     winner_mbmi = &winner_mode_stat->mbmi;
3345 
3346     *winner_rd_cost = &winner_mode_stat->rd_cost;
3347     *winner_rate_y = winner_mode_stat->rate_y;
3348     *winner_rate_uv = winner_mode_stat->rate_uv;
3349     *winner_mode_index = winner_mode_stat->mode_index;
3350   } else {
3351     winner_mbmi = best_mbmode;
3352     *winner_rd_cost = best_rd_cost;
3353     *winner_rate_y = best_rate_y;
3354     *winner_rate_uv = best_rate_uv;
3355     *winner_mode_index = *best_mode_index;
3356   }
3357   return winner_mbmi;
3358 }
3359 
3360 // speed feature: fast intra/inter transform type search
3361 // Used for speed >= 2
3362 // When this speed feature is on, in rd mode search, only DCT is used.
3363 // After the mode is determined, this function is called, to select
3364 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3365 static AOM_INLINE void refine_winner_mode_tx(
3366     const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3367     PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3368     MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3369     int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3370   const AV1_COMMON *const cm = &cpi->common;
3371   MACROBLOCKD *const xd = &x->e_mbd;
3372   MB_MODE_INFO *const mbmi = xd->mi[0];
3373   TxfmSearchParams *txfm_params = &x->txfm_search_params;
3374   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3375   int64_t best_rd;
3376   const int num_planes = av1_num_planes(cm);
3377 
3378   if (!is_winner_mode_processing_enabled(cpi, best_mbmode, best_mbmode->mode))
3379     return;
3380 
3381   // Set params for winner mode evaluation
3382   set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3383 
3384   // No best mode identified so far
3385   if (*best_mode_index == THR_INVALID) return;
3386 
3387   best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3388   for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3389     RD_STATS *winner_rd_stats = NULL;
3390     int winner_rate_y = 0, winner_rate_uv = 0;
3391     THR_MODES winner_mode_index = 0;
3392 
3393     // TODO(any): Combine best mode and multi-winner mode processing paths
3394     // Get winner mode stats for current mode index
3395     MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3396         x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3397         &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3398         cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3399 
3400     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3401         winner_mode_index != THR_INVALID &&
3402         is_winner_mode_processing_enabled(cpi, winner_mbmi,
3403                                           winner_mbmi->mode)) {
3404       RD_STATS rd_stats = *winner_rd_stats;
3405       int skip_blk = 0;
3406       RD_STATS rd_stats_y, rd_stats_uv;
3407       const int skip_ctx = av1_get_skip_txfm_context(xd);
3408 
3409       *mbmi = *winner_mbmi;
3410 
3411       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3412 
3413       // Select prediction reference frames.
3414       for (int i = 0; i < num_planes; i++) {
3415         xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3416         if (has_second_ref(mbmi))
3417           xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3418       }
3419 
3420       if (is_inter_mode(mbmi->mode)) {
3421         const int mi_row = xd->mi_row;
3422         const int mi_col = xd->mi_col;
3423         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3424                                       av1_num_planes(cm) - 1);
3425         if (mbmi->motion_mode == OBMC_CAUSAL)
3426           av1_build_obmc_inter_predictors_sb(cm, xd);
3427 
3428         av1_subtract_plane(x, bsize, 0);
3429         if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3430             !xd->lossless[mbmi->segment_id]) {
3431           av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3432                                               INT64_MAX);
3433           assert(rd_stats_y.rate != INT_MAX);
3434         } else {
3435           av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3436                                             INT64_MAX);
3437           memset(mbmi->inter_tx_size, mbmi->tx_size,
3438                  sizeof(mbmi->inter_tx_size));
3439           for (int i = 0; i < xd->height * xd->width; ++i)
3440             set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3441         }
3442       } else {
3443         av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3444                                           INT64_MAX);
3445       }
3446 
3447       if (num_planes > 1) {
3448         av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3449       } else {
3450         av1_init_rd_stats(&rd_stats_uv);
3451       }
3452 
3453       const ModeCosts *mode_costs = &x->mode_costs;
3454       if (is_inter_mode(mbmi->mode) &&
3455           RDCOST(x->rdmult,
3456                  mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3457                      rd_stats_uv.rate,
3458                  (rd_stats_y.dist + rd_stats_uv.dist)) >
3459               RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3460                      (rd_stats_y.sse + rd_stats_uv.sse))) {
3461         skip_blk = 1;
3462         rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3463         rd_stats_uv.rate = 0;
3464         rd_stats_y.dist = rd_stats_y.sse;
3465         rd_stats_uv.dist = rd_stats_uv.sse;
3466       } else {
3467         skip_blk = 0;
3468         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3469       }
3470       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3471                       winner_rate_y - winner_rate_uv;
3472       int64_t this_rd =
3473           RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3474       if (best_rd > this_rd) {
3475         *best_mbmode = *mbmi;
3476         *best_mode_index = winner_mode_index;
3477         av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3478         av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3479         rd_cost->rate = this_rate;
3480         rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3481         rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3482         rd_cost->rdcost = this_rd;
3483         best_rd = this_rd;
3484         *best_skip2 = skip_blk;
3485       }
3486     }
3487   }
3488 }
3489 
3490 /*!\cond */
3491 typedef struct {
3492   // Mask for each reference frame, specifying which prediction modes to NOT try
3493   // during search.
3494   uint32_t pred_modes[REF_FRAMES];
3495   // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3496   // reference frames (i, j).
3497   // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3498   // (NONE_FRAME).
3499   bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3500 } mode_skip_mask_t;
3501 /*!\endcond */
3502 
3503 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3504 static AOM_INLINE void disable_reference(
3505     MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3506   for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3507     ref_combo[ref][ref2 + 1] = true;
3508   }
3509 }
3510 
3511 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3512 static AOM_INLINE void disable_inter_references_except_altref(
3513     bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3514   disable_reference(LAST_FRAME, ref_combo);
3515   disable_reference(LAST2_FRAME, ref_combo);
3516   disable_reference(LAST3_FRAME, ref_combo);
3517   disable_reference(GOLDEN_FRAME, ref_combo);
3518   disable_reference(BWDREF_FRAME, ref_combo);
3519   disable_reference(ALTREF2_FRAME, ref_combo);
3520 }
3521 
3522 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3523   { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3524   { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3525   { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3526   { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3527   { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3528   { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3529   { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3530   { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3531 };
3532 
3533 static const MV_REFERENCE_FRAME real_time_ref_combos[][2] = {
3534   { LAST_FRAME, NONE_FRAME },
3535   { ALTREF_FRAME, NONE_FRAME },
3536   { GOLDEN_FRAME, NONE_FRAME },
3537   { INTRA_FRAME, NONE_FRAME }
3538 };
3539 
3540 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3541 
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3542 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3543                                          REF_SET ref_set) {
3544   if (ref_set == REF_SET_FULL) {
3545     // Everything available by default.
3546     memset(mask, 0, sizeof(*mask));
3547   } else {
3548     // All modes available by default.
3549     memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3550     // All references disabled first.
3551     for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3552       for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3553         mask->ref_combo[ref1][ref2 + 1] = true;
3554       }
3555     }
3556     const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3557     int num_ref_combos;
3558 
3559     // Then enable reduced set of references explicitly.
3560     switch (ref_set) {
3561       case REF_SET_REDUCED:
3562         ref_set_combos = reduced_ref_combos;
3563         num_ref_combos =
3564             (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3565         break;
3566       case REF_SET_REALTIME:
3567         ref_set_combos = real_time_ref_combos;
3568         num_ref_combos =
3569             (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3570         break;
3571       default: assert(0); num_ref_combos = 0;
3572     }
3573 
3574     for (int i = 0; i < num_ref_combos; ++i) {
3575       const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3576       mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3577     }
3578   }
3579 }
3580 
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3581 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3582                                            const AV1_COMP *cpi, MACROBLOCK *x,
3583                                            BLOCK_SIZE bsize) {
3584   const AV1_COMMON *const cm = &cpi->common;
3585   const struct segmentation *const seg = &cm->seg;
3586   MACROBLOCKD *const xd = &x->e_mbd;
3587   MB_MODE_INFO *const mbmi = xd->mi[0];
3588   unsigned char segment_id = mbmi->segment_id;
3589   const SPEED_FEATURES *const sf = &cpi->sf;
3590   REF_SET ref_set = REF_SET_FULL;
3591 
3592   if (sf->rt_sf.use_real_time_ref_set)
3593     ref_set = REF_SET_REALTIME;
3594   else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3595     ref_set = REF_SET_REDUCED;
3596 
3597   default_skip_mask(mask, ref_set);
3598 
3599   int min_pred_mv_sad = INT_MAX;
3600   MV_REFERENCE_FRAME ref_frame;
3601   if (ref_set == REF_SET_REALTIME) {
3602     // For real-time encoding, we only look at a subset of ref frames. So the
3603     // threshold for pruning should be computed from this subset as well.
3604     const int num_rt_refs =
3605         sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3606     for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3607       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3608       if (ref != INTRA_FRAME) {
3609         min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3610       }
3611     }
3612   } else {
3613     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3614       min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3615   }
3616 
3617   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3618     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3619       // Skip checking missing reference in both single and compound reference
3620       // modes.
3621       disable_reference(ref_frame, mask->ref_combo);
3622     } else {
3623       // Skip fixed mv modes for poor references
3624       if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3625         mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3626       }
3627     }
3628     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3629         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3630       // Reference not used for the segment.
3631       disable_reference(ref_frame, mask->ref_combo);
3632     }
3633   }
3634   // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3635   // is disabled for this segment. This is to prevent the possibility that we
3636   // end up unable to pick any mode.
3637   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3638     // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3639     // unless ARNR filtering is enabled in which case we want
3640     // an unfiltered alternative. We allow near/nearest as well
3641     // because they may result in zero-zero MVs but be cheaper.
3642     if (cpi->rc.is_src_frame_alt_ref &&
3643         (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3644       disable_inter_references_except_altref(mask->ref_combo);
3645 
3646       mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3647       const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3648       int_mv near_mv, nearest_mv, global_mv;
3649       get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3650                   &x->mbmi_ext);
3651       get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3652       get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3653 
3654       if (near_mv.as_int != global_mv.as_int)
3655         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3656       if (nearest_mv.as_int != global_mv.as_int)
3657         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3658     }
3659   }
3660 
3661   if (cpi->rc.is_src_frame_alt_ref) {
3662     if (sf->inter_sf.alt_ref_search_fp &&
3663         (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3664       mask->pred_modes[ALTREF_FRAME] = 0;
3665       disable_inter_references_except_altref(mask->ref_combo);
3666       disable_reference(INTRA_FRAME, mask->ref_combo);
3667     }
3668   }
3669 
3670   if (sf->inter_sf.alt_ref_search_fp) {
3671     if (!cm->show_frame && x->best_pred_mv_sad < INT_MAX) {
3672       int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 3);
3673       // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3674       // those are past frames
3675       for (ref_frame = BWDREF_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3676         if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3677             0)
3678           if (x->pred_mv_sad[ref_frame] > sad_thresh)
3679             mask->pred_modes[ref_frame] |= INTER_ALL;
3680       }
3681     }
3682   }
3683 
3684   if (bsize > sf->part_sf.max_intra_bsize) {
3685     disable_reference(INTRA_FRAME, mask->ref_combo);
3686   }
3687 
3688   mask->pred_modes[INTRA_FRAME] |=
3689       ~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
3690 }
3691 
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3692 static AOM_INLINE void init_neighbor_pred_buf(
3693     const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3694     int is_hbd) {
3695   if (is_hbd) {
3696     const int len = sizeof(uint16_t);
3697     args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3698     args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3699                                                  (MAX_SB_SQUARE >> 1) * len);
3700     args->above_pred_buf[2] =
3701         CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3702     args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3703     args->left_pred_buf[1] =
3704         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3705     args->left_pred_buf[2] =
3706         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3707   } else {
3708     args->above_pred_buf[0] = obmc_buffer->above_pred;
3709     args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3710     args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3711     args->left_pred_buf[0] = obmc_buffer->left_pred;
3712     args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3713     args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3714   }
3715 }
3716 
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3717 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3718                                       MV_REFERENCE_FRAME ref_frame) {
3719   const AV1_COMMON *const cm = &cpi->common;
3720   MV_REFERENCE_FRAME rf[2];
3721   av1_set_ref_frame(rf, ref_frame);
3722 
3723   if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3724 
3725   if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3726                                        cm->cur_frame->ref_display_order_hint)) {
3727     return 1;
3728   }
3729 
3730   return 0;
3731 }
3732 
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3733 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3734     int ref_frame, int skip_ref_frame_mask) {
3735   for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3736     if (!(skip_ref_frame_mask & (1 << r))) {
3737       const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3738       if (rf[0] == ref_frame || rf[1] == ref_frame) {
3739         return 1;
3740       }
3741     }
3742   }
3743   return 0;
3744 }
3745 
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3746 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3747                                                  const MB_MODE_INFO *mi_cache) {
3748   if (!mi_cache) {
3749     return 0;
3750   }
3751 
3752   if (ref_frame < REF_FRAMES) {
3753     return (ref_frame == mi_cache->ref_frame[0] ||
3754             ref_frame == mi_cache->ref_frame[1]);
3755   }
3756 
3757   // if we are here, then the current mode is compound.
3758   MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
3759   return ref_frame == cached_ref_type;
3760 }
3761 
3762 // Please add/modify parameter setting in this function, making it consistent
3763 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])3764 static AOM_INLINE void set_params_rd_pick_inter_mode(
3765     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
3766     BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
3767     unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
3768     struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
3769   const AV1_COMMON *const cm = &cpi->common;
3770   MACROBLOCKD *const xd = &x->e_mbd;
3771   MB_MODE_INFO *const mbmi = xd->mi[0];
3772   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3773   unsigned char segment_id = mbmi->segment_id;
3774 
3775   init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
3776   av1_collect_neighbors_ref_counts(xd);
3777   estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
3778                            ref_costs_comp);
3779 
3780   const int mi_row = xd->mi_row;
3781   const int mi_col = xd->mi_col;
3782   x->best_pred_mv_sad = INT_MAX;
3783 
3784   for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
3785        ++ref_frame) {
3786     x->pred_mv_sad[ref_frame] = INT_MAX;
3787     mbmi_ext->mode_context[ref_frame] = 0;
3788     mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3789     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
3790       // Skip the ref frame if the mask says skip and the ref is not used by
3791       // compound ref.
3792       if (skip_ref_frame_mask & (1 << ref_frame) &&
3793           !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
3794           !is_ref_frame_used_in_cache(ref_frame, x->intermode_cache)) {
3795         continue;
3796       }
3797       assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
3798       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
3799     }
3800     // Store the best pred_mv_sad across all past frames
3801     if (cpi->sf.inter_sf.alt_ref_search_fp &&
3802         cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3803       x->best_pred_mv_sad =
3804           AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3805   }
3806 
3807   if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
3808     // No second reference on RT ref set, so no need to initialize
3809     for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
3810          ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
3811       mbmi_ext->mode_context[ref_frame] = 0;
3812       mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3813       const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
3814       if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
3815             (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
3816         continue;
3817       }
3818 
3819       if (skip_ref_frame_mask & (1 << ref_frame) &&
3820           !is_ref_frame_used_in_cache(ref_frame, x->intermode_cache)) {
3821         continue;
3822       }
3823       // Ref mv list population is not required, when compound references are
3824       // pruned.
3825       if (prune_ref_frame(cpi, x, ref_frame)) continue;
3826 
3827       av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3828                        xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3829                        mbmi_ext->mode_context);
3830       // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3831       // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3832       av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3833     }
3834   }
3835 
3836   av1_count_overlappable_neighbors(cm, xd);
3837   const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
3838   const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
3839                          cpi->sf.inter_sf.prune_obmc_prob_thresh;
3840   if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
3841     if (check_num_overlappable_neighbors(mbmi) &&
3842         is_motion_variation_allowed_bsize(bsize)) {
3843       int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3844       int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3845                                        MAX_SB_SIZE >> 1 };
3846       int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3847                                         MAX_SB_SIZE >> 1 };
3848       int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3849       av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
3850                                           dst_width1, dst_height1,
3851                                           args->above_pred_stride);
3852       av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
3853                                          dst_width2, dst_height2,
3854                                          args->left_pred_stride);
3855       const int num_planes = av1_num_planes(cm);
3856       av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
3857                            mi_col, 0, num_planes);
3858       calc_target_weighted_pred(
3859           cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
3860           args->left_pred_buf[0], args->left_pred_stride[0]);
3861     }
3862   }
3863 
3864   init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
3865 
3866   // Set params for mode evaluation
3867   set_mode_eval_params(cpi, x, MODE_EVAL);
3868 
3869   x->comp_rd_stats_idx = 0;
3870 }
3871 
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)3872 static AOM_INLINE void init_inter_mode_search_state(
3873     InterModeSearchState *search_state, const AV1_COMP *cpi,
3874     const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
3875   init_intra_mode_search_state(&search_state->intra_search_state);
3876   av1_invalid_rd_stats(&search_state->best_y_rdcost);
3877 
3878   search_state->best_rd = best_rd_so_far;
3879   search_state->best_skip_rd[0] = INT64_MAX;
3880   search_state->best_skip_rd[1] = INT64_MAX;
3881 
3882   av1_zero(search_state->best_mbmode);
3883 
3884   search_state->best_rate_y = INT_MAX;
3885 
3886   search_state->best_rate_uv = INT_MAX;
3887 
3888   search_state->best_mode_skippable = 0;
3889 
3890   search_state->best_skip2 = 0;
3891 
3892   search_state->best_mode_index = THR_INVALID;
3893 
3894   const MACROBLOCKD *const xd = &x->e_mbd;
3895   const MB_MODE_INFO *const mbmi = xd->mi[0];
3896   const unsigned char segment_id = mbmi->segment_id;
3897 
3898   search_state->num_available_refs = 0;
3899   memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
3900   memset(search_state->dist_order_refs, -1,
3901          sizeof(search_state->dist_order_refs));
3902 
3903   for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
3904     search_state->mode_threshold[i] = 0;
3905   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
3906   for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3907     search_state->mode_threshold[i] =
3908         ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
3909         RD_THRESH_FAC_FRAC_BITS;
3910 
3911   search_state->best_intra_rd = INT64_MAX;
3912 
3913   search_state->best_pred_sse = UINT_MAX;
3914 
3915   av1_zero(search_state->single_newmv);
3916   av1_zero(search_state->single_newmv_rate);
3917   av1_zero(search_state->single_newmv_valid);
3918   for (int i = 0; i < MB_MODE_COUNT; ++i) {
3919     for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
3920       for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3921         search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
3922         search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
3923       }
3924     }
3925   }
3926 
3927   for (int dir = 0; dir < 2; ++dir) {
3928     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3929       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3930         SingleInterModeState *state;
3931 
3932         state = &search_state->single_state[dir][mode][ref_frame];
3933         state->ref_frame = NONE_FRAME;
3934         state->rd = INT64_MAX;
3935 
3936         state = &search_state->single_state_modelled[dir][mode][ref_frame];
3937         state->ref_frame = NONE_FRAME;
3938         state->rd = INT64_MAX;
3939       }
3940     }
3941   }
3942   for (int dir = 0; dir < 2; ++dir) {
3943     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3944       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3945         search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
3946       }
3947     }
3948   }
3949   for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3950     search_state->best_single_rd[ref_frame] = INT64_MAX;
3951     search_state->best_single_mode[ref_frame] = MB_MODE_COUNT;
3952   }
3953   av1_zero(search_state->single_state_cnt);
3954   av1_zero(search_state->single_state_modelled_cnt);
3955 
3956   for (int i = 0; i < REFERENCE_MODES; ++i) {
3957     search_state->best_pred_rd[i] = INT64_MAX;
3958   }
3959 }
3960 
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)3961 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
3962                            const MV_REFERENCE_FRAME *ref_frame,
3963                            const PREDICTION_MODE this_mode) {
3964   if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
3965     return true;
3966   }
3967 
3968   return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
3969 }
3970 
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)3971 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
3972                                       BLOCK_SIZE bsize,
3973                                       PREDICTION_MODE curr_mode,
3974                                       const MV_REFERENCE_FRAME *ref_frames) {
3975   const int comp_pred = ref_frames[1] > INTRA_FRAME;
3976   if (comp_pred) {
3977     if (!is_comp_ref_allowed(bsize)) return 1;
3978     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
3979       return 1;
3980     }
3981 
3982     const AV1_COMMON *const cm = &cpi->common;
3983     if (frame_is_intra_only(cm)) return 1;
3984 
3985     const CurrentFrame *const current_frame = &cm->current_frame;
3986     if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
3987 
3988     const struct segmentation *const seg = &cm->seg;
3989     const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
3990     // Do not allow compound prediction if the segment level reference frame
3991     // feature is in use as in this case there can only be one reference.
3992     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
3993   }
3994 
3995   if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
3996     // Mode must be compatible
3997     if (!is_interintra_allowed_bsize(bsize)) return 1;
3998     if (!is_interintra_allowed_mode(curr_mode)) return 1;
3999   }
4000 
4001   return 0;
4002 }
4003 
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)4004 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
4005                                         BLOCK_SIZE bsize, int mib_size) {
4006   const int sb_size_mask = mib_size - 1;
4007   const MACROBLOCKD *const xd = &x->e_mbd;
4008   const int mi_row = xd->mi_row;
4009   const int mi_col = xd->mi_col;
4010   const int mi_row_in_sb = mi_row & sb_size_mask;
4011   const int mi_col_in_sb = mi_col & sb_size_mask;
4012   const int mi_w = mi_size_wide[bsize];
4013   const int mi_h = mi_size_high[bsize];
4014   int picked_ref_frames_mask = 0;
4015   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4016     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4017       picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4018     }
4019   }
4020   return picked_ref_frames_mask;
4021 }
4022 
4023 // Check if reference frame pair of the current block matches with the given
4024 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4025 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4026                                        const MV_REFERENCE_FRAME *ref_frames) {
4027   return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4028           (ref_frames[1] == mbmi->ref_frame[1]));
4029 }
4030 
4031 // Case 1: return 0, means don't skip this mode
4032 // Case 2: return 1, means skip this mode completely
4033 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4034 static int inter_mode_search_order_independent_skip(
4035     const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4036     InterModeSearchState *search_state, int skip_ref_frame_mask,
4037     PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4038   if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4039     return 1;
4040   }
4041 
4042   const int ref_type = av1_ref_frame_type(ref_frame);
4043   if (prune_ref_frame(cpi, x, ref_type)) return 1;
4044 
4045   // This is only used in motion vector unit test.
4046   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4047       ref_frame[0] == INTRA_FRAME)
4048     return 1;
4049 
4050   const AV1_COMMON *const cm = &cpi->common;
4051   if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4052     return 1;
4053   }
4054 
4055   // Reuse the prediction mode in cache
4056   if (x->use_intermode_cache) {
4057     const MB_MODE_INFO *cached_mi = x->intermode_cache;
4058     const PREDICTION_MODE cached_mode = cached_mi->mode;
4059     const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4060     const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4061 
4062     // If the cached mode is intra, then we just need to match the mode.
4063     if (is_mode_intra(cached_mode) && mode != cached_mode) {
4064       return 1;
4065     }
4066 
4067     // If the cached mode is single inter mode, then we match the mode and
4068     // reference frame.
4069     if (cached_mode_is_single) {
4070       if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4071         return 1;
4072       }
4073     } else {
4074       // If the cached mode is compound, then we need to consider several cases.
4075       const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4076       if (mode_is_single) {
4077         // If the mode is single, we know the modes can't match. But we might
4078         // still want to search it if compound mode depends on the current mode.
4079         int skip_motion_mode_only = 0;
4080         if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4081           skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4082         } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4083           skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4084         } else if (cached_mode == NEW_NEWMV) {
4085           skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4086                                    ref_frame[0] == cached_frame[1]);
4087         }
4088 
4089         return 1 + skip_motion_mode_only;
4090       } else {
4091         // If both modes are compound, then everything must match.
4092         if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4093             ref_frame[1] != cached_frame[1]) {
4094           return 1;
4095         }
4096       }
4097     }
4098   }
4099 
4100   const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4101   // If no valid mode has been found so far in PARTITION_NONE when finding a
4102   // valid partition is required, do not skip mode.
4103   if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4104       x->must_find_valid_partition)
4105     return 0;
4106 
4107   const SPEED_FEATURES *const sf = &cpi->sf;
4108   // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4109   // frames
4110   if (sf->inter_sf.prune_nearmv_using_neighbors &&
4111       (mode == NEAR_NEARMV || mode == NEARMV)) {
4112     const MACROBLOCKD *const xd = &x->e_mbd;
4113     if (search_state->best_rd != INT64_MAX && xd->left_available &&
4114         xd->up_available) {
4115       const int num_ref_frame_pair_match_thresh =
4116           2 - (x->qindex * 3 / QINDEX_RANGE);
4117       assert(num_ref_frame_pair_match_thresh <= 2 &&
4118              num_ref_frame_pair_match_thresh >= 0);
4119       int num_ref_frame_pair_match = 0;
4120 
4121       num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4122       num_ref_frame_pair_match +=
4123           match_ref_frame_pair(xd->above_mbmi, ref_frame);
4124 
4125       // Prune modes if:
4126       // num_ref_frame_pair_match < 2 for qindex   0 to 85
4127       // num_ref_frame_pair_match < 1 for qindex  86 to 170
4128       // No pruning for qindex 171 to 255
4129       if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4130     }
4131   }
4132 
4133   int skip_motion_mode = 0;
4134   if (mbmi->partition != PARTITION_NONE) {
4135     int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4136     if (ref_type <= ALTREF_FRAME && skip_ref) {
4137       // Since the compound ref modes depends on the motion estimation result of
4138       // two single ref modes (best mv of single ref modes as the start point),
4139       // if current single ref mode is marked skip, we need to check if it will
4140       // be used in compound ref modes.
4141       if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4142         // Found a not skipped compound ref mode which contains current
4143         // single ref. So this single ref can't be skipped completely
4144         // Just skip its motion mode search, still try its simple
4145         // transition mode.
4146         skip_motion_mode = 1;
4147         skip_ref = 0;
4148       }
4149     }
4150     // If we are reusing the prediction from cache, and the current frame is
4151     // required by the cache, then we cannot prune it.
4152     if (is_ref_frame_used_in_cache(ref_type, x->intermode_cache)) {
4153       skip_ref = 0;
4154       // If the cache only needs the current reference type for compound
4155       // prediction, then we can skip motion mode search.
4156       skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4157                           x->intermode_cache->ref_frame[1] > INTRA_FRAME);
4158     }
4159     if (skip_ref) return 1;
4160   }
4161 
4162   if (ref_frame[0] == INTRA_FRAME) {
4163     if (mode != DC_PRED) {
4164       // Disable intra modes other than DC_PRED for blocks with low variance
4165       // Threshold for intra skipping based on source variance
4166       // TODO(debargha): Specialize the threshold for super block sizes
4167       const unsigned int skip_intra_var_thresh = 64;
4168       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4169           x->source_variance < skip_intra_var_thresh)
4170         return 1;
4171     }
4172   }
4173 
4174   if (skip_motion_mode) return 2;
4175 
4176   return 0;
4177 }
4178 
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4179 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4180                              const MV_REFERENCE_FRAME *ref_frames,
4181                              const AV1_COMMON *cm) {
4182   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4183   mbmi->ref_mv_idx = 0;
4184   mbmi->mode = curr_mode;
4185   mbmi->uv_mode = UV_DC_PRED;
4186   mbmi->ref_frame[0] = ref_frames[0];
4187   mbmi->ref_frame[1] = ref_frames[1];
4188   pmi->palette_size[0] = 0;
4189   pmi->palette_size[1] = 0;
4190   mbmi->filter_intra_mode_info.use_filter_intra = 0;
4191   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4192   mbmi->motion_mode = SIMPLE_TRANSLATION;
4193   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4194   set_default_interp_filters(mbmi, cm->features.interp_filter);
4195 }
4196 
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4197 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4198                                              InterModeSearchState *search_state,
4199                                              const MB_MODE_INFO *const mbmi) {
4200   int i, j;
4201   const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4202   const PREDICTION_MODE this_mode = mbmi->mode;
4203   const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4204   const int mode_offset = INTER_OFFSET(this_mode);
4205   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4206 
4207   // Simple rd
4208   int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4209   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4210     const int64_t rd =
4211         search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4212     if (rd < simple_rd) simple_rd = rd;
4213   }
4214 
4215   // Insertion sort of single_state
4216   const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4217   SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4218   i = search_state->single_state_cnt[dir][mode_offset];
4219   for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4220     state_s[j] = state_s[j - 1];
4221   state_s[j] = this_state_s;
4222   search_state->single_state_cnt[dir][mode_offset]++;
4223 
4224   // Modelled rd
4225   int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4226   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4227     const int64_t rd =
4228         search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4229     if (rd < modelled_rd) modelled_rd = rd;
4230   }
4231 
4232   // Insertion sort of single_state_modelled
4233   const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4234   SingleInterModeState *state_m =
4235       search_state->single_state_modelled[dir][mode_offset];
4236   i = search_state->single_state_modelled_cnt[dir][mode_offset];
4237   for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4238     state_m[j] = state_m[j - 1];
4239   state_m[j] = this_state_m;
4240   search_state->single_state_modelled_cnt[dir][mode_offset]++;
4241 }
4242 
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4243 static AOM_INLINE void analyze_single_states(
4244     const AV1_COMP *cpi, InterModeSearchState *search_state) {
4245   const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4246   assert(prune_level >= 1);
4247   int i, j, dir, mode;
4248 
4249   for (dir = 0; dir < 2; ++dir) {
4250     int64_t best_rd;
4251     SingleInterModeState(*state)[FWD_REFS];
4252     const int prune_factor = prune_level >= 2 ? 6 : 5;
4253 
4254     // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4255     // reference frames for all the modes (NEARESTMV and NEARMV may not
4256     // have same motion vectors). Always keep the best of each mode
4257     // because it might form the best possible combination with other mode.
4258     state = search_state->single_state[dir];
4259     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4260                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4261     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4262       for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4263         if (state[mode][i].rd != INT64_MAX &&
4264             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4265           state[mode][i].valid = 0;
4266         }
4267       }
4268     }
4269 
4270     state = search_state->single_state_modelled[dir];
4271     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4272                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4273     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4274       for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4275         if (state[mode][i].rd != INT64_MAX &&
4276             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4277           state[mode][i].valid = 0;
4278         }
4279       }
4280     }
4281   }
4282 
4283   // Ordering by simple rd first, then by modelled rd
4284   for (dir = 0; dir < 2; ++dir) {
4285     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4286       const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4287       const int state_cnt_m =
4288           search_state->single_state_modelled_cnt[dir][mode];
4289       SingleInterModeState *state_s = search_state->single_state[dir][mode];
4290       SingleInterModeState *state_m =
4291           search_state->single_state_modelled[dir][mode];
4292       int count = 0;
4293       const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4294       for (i = 0; i < state_cnt_s; ++i) {
4295         if (state_s[i].rd == INT64_MAX) break;
4296         if (state_s[i].valid) {
4297           search_state->single_rd_order[dir][mode][count++] =
4298               state_s[i].ref_frame;
4299         }
4300       }
4301       if (count >= max_candidates) continue;
4302 
4303       for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4304         if (state_m[i].rd == INT64_MAX) break;
4305         if (!state_m[i].valid) continue;
4306         const int ref_frame = state_m[i].ref_frame;
4307         int match = 0;
4308         // Check if existing already
4309         for (j = 0; j < count; ++j) {
4310           if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4311             match = 1;
4312             break;
4313           }
4314         }
4315         if (match) continue;
4316         // Check if this ref_frame is removed in simple rd
4317         int valid = 1;
4318         for (j = 0; j < state_cnt_s; ++j) {
4319           if (ref_frame == state_s[j].ref_frame) {
4320             valid = state_s[j].valid;
4321             break;
4322           }
4323         }
4324         if (valid) {
4325           search_state->single_rd_order[dir][mode][count++] = ref_frame;
4326         }
4327       }
4328     }
4329   }
4330 }
4331 
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4332 static int compound_skip_get_candidates(
4333     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4334     const int dir, const PREDICTION_MODE mode) {
4335   const int mode_offset = INTER_OFFSET(mode);
4336   const SingleInterModeState *state =
4337       search_state->single_state[dir][mode_offset];
4338   const SingleInterModeState *state_modelled =
4339       search_state->single_state_modelled[dir][mode_offset];
4340 
4341   int max_candidates = 0;
4342   for (int i = 0; i < FWD_REFS; ++i) {
4343     if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4344     max_candidates++;
4345   }
4346 
4347   int candidates = max_candidates;
4348   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4349     candidates = AOMMIN(2, max_candidates);
4350   }
4351   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4352     if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4353         state[0].ref_frame == state_modelled[0].ref_frame)
4354       candidates = 1;
4355     if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4356   }
4357 
4358   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4359     // Limit the number of candidates to 1 in each direction for compound
4360     // prediction
4361     candidates = AOMMIN(1, candidates);
4362   }
4363   return candidates;
4364 }
4365 
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4366 static int compound_skip_by_single_states(
4367     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4368     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4369     const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4370   const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4371   const int mode[2] = { compound_ref0_mode(this_mode),
4372                         compound_ref1_mode(this_mode) };
4373   const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4374   const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4375                             refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4376   int ref_searched[2] = { 0, 0 };
4377   int ref_mv_match[2] = { 1, 1 };
4378   int i, j;
4379 
4380   for (i = 0; i < 2; ++i) {
4381     const SingleInterModeState *state =
4382         search_state->single_state[mode_dir[i]][mode_offset[i]];
4383     const int state_cnt =
4384         search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4385     for (j = 0; j < state_cnt; ++j) {
4386       if (state[j].ref_frame == refs[i]) {
4387         ref_searched[i] = 1;
4388         break;
4389       }
4390     }
4391   }
4392 
4393   const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4394   for (i = 0; i < 2; ++i) {
4395     if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4396       continue;
4397     }
4398     const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4399     for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4400       int_mv single_mv;
4401       int_mv comp_mv;
4402       get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4403                   &x->mbmi_ext);
4404       get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4405       if (single_mv.as_int != comp_mv.as_int) {
4406         ref_mv_match[i] = 0;
4407         break;
4408       }
4409     }
4410   }
4411 
4412   for (i = 0; i < 2; ++i) {
4413     if (!ref_searched[i] || !ref_mv_match[i]) continue;
4414     const int candidates =
4415         compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4416     const MV_REFERENCE_FRAME *ref_order =
4417         search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4418     int match = 0;
4419     for (j = 0; j < candidates; ++j) {
4420       if (refs[i] == ref_order[j]) {
4421         match = 1;
4422         break;
4423       }
4424     }
4425     if (!match) return 1;
4426   }
4427 
4428   return 0;
4429 }
4430 
4431 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4432 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4433                                    const MV_REFERENCE_FRAME *ref_frames,
4434                                    int *const is_ref_match) {
4435   if (is_inter_block(mbmi)) {
4436     is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4437     is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4438     if (has_second_ref(mbmi)) {
4439       is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4440       is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4441     }
4442   }
4443 }
4444 
4445 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_compound_using_neighbors)4446 static INLINE int compound_skip_using_neighbor_refs(
4447     MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4448     const MV_REFERENCE_FRAME *ref_frames, int prune_compound_using_neighbors) {
4449   // Exclude non-extended compound modes from pruning
4450   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4451       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4452     return 0;
4453 
4454   int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4455                                 // 1 - match for backward refs
4456   // Check if ref frames of this block matches with left neighbor.
4457   if (xd->left_available)
4458     match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4459 
4460   // Check if ref frames of this block matches with above neighbor.
4461   if (xd->up_available)
4462     match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4463 
4464   // Combine ref frame match with neighbors in forward and backward refs.
4465   const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4466 
4467   // Pruning based on ref frame match with neighbors.
4468   if (track_ref_match >= prune_compound_using_neighbors) return 0;
4469   return 1;
4470 }
4471 
4472 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4473 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4474                                            const PREDICTION_MODE this_mode,
4475                                            const MV_REFERENCE_FRAME ref_frame,
4476                                            int64_t this_rd) {
4477   if (this_rd < search_state->best_single_rd[ref_frame]) {
4478     search_state->best_single_rd[ref_frame] = this_rd;
4479     search_state->best_single_mode[ref_frame] = this_mode;
4480   }
4481 }
4482 
4483 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4484 static INLINE int skip_compound_using_best_single_mode_ref(
4485     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4486     const PREDICTION_MODE *best_single_mode,
4487     int prune_comp_using_best_single_mode_ref) {
4488   // Exclude non-extended compound modes from pruning
4489   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4490       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4491     return 0;
4492 
4493   assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4494   const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4495   // Get ref frame direction corresponding to NEWMV
4496   // 0 - NEWMV corresponding to forward direction
4497   // 1 - NEWMV corresponding to backward direction
4498   const int newmv_dir = comp_mode_ref0 != NEWMV;
4499 
4500   // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4501   // have NEWMV as single mode winner.
4502   // Example: For an extended-compound mode,
4503   // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4504   // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4505   // - Avoid pruning this mode, if best single mode corresponding to ref frame
4506   //   ALTREF_FRAME is NEWMV
4507   const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4508   if (single_mode == NEWMV) return 0;
4509 
4510   // Avoid pruning the compound mode when best single mode is not available
4511   if (prune_comp_using_best_single_mode_ref == 1)
4512     if (single_mode == MB_MODE_COUNT) return 0;
4513   return 1;
4514 }
4515 
compare_int64(const void * a,const void * b)4516 static int compare_int64(const void *a, const void *b) {
4517   int64_t a64 = *((int64_t *)a);
4518   int64_t b64 = *((int64_t *)b);
4519   if (a64 < b64) {
4520     return -1;
4521   } else if (a64 == b64) {
4522     return 0;
4523   } else {
4524     return 1;
4525   }
4526 }
4527 
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4528 static INLINE void update_search_state(
4529     InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4530     PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4531     const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4532     THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4533   const MACROBLOCKD *xd = &x->e_mbd;
4534   const MB_MODE_INFO *mbmi = xd->mi[0];
4535   const int skip_ctx = av1_get_skip_txfm_context(xd);
4536   const int skip_txfm =
4537       mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4538   const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4539 
4540   search_state->best_rd = new_best_rd_stats->rdcost;
4541   search_state->best_mode_index = new_best_mode;
4542   *best_rd_stats_dst = *new_best_rd_stats;
4543   search_state->best_mbmode = *mbmi;
4544   search_state->best_skip2 = skip_txfm;
4545   search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4546   // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4547   // rate_uv because av1_txfm_search process is replaced by rd estimation.
4548   // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4549   // These two values will be updated when av1_txfm_search is called.
4550   if (txfm_search_done) {
4551     search_state->best_rate_y =
4552         new_best_rd_stats_y->rate +
4553         x->mode_costs.skip_txfm_cost[skip_ctx]
4554                                     [new_best_rd_stats->skip_txfm || skip_txfm];
4555     search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4556   }
4557   search_state->best_y_rdcost = *new_best_rd_stats_y;
4558   memcpy(ctx->blk_skip, txfm_info->blk_skip,
4559          sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4560   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4561 }
4562 
4563 // Find the best RD for a reference frame (among single reference modes)
4564 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4565 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4566   assert(ref_frame_rd[0] == INT64_MAX);
4567   int64_t ref_copy[REF_FRAMES - 1];
4568   memcpy(ref_copy, ref_frame_rd + 1,
4569          sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4570   qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4571 
4572   int64_t cutoff = ref_copy[0];
4573   // The cut-off is within 10% of the best.
4574   if (cutoff != INT64_MAX) {
4575     assert(cutoff < INT64_MAX / 200);
4576     cutoff = (110 * cutoff) / 100;
4577   }
4578   ref_frame_rd[0] = cutoff;
4579 }
4580 
4581 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4582 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4583                                         MV_REFERENCE_FRAME frame1,
4584                                         MV_REFERENCE_FRAME frame2) {
4585   assert(frame2 > 0);
4586   return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4587          ref_frame_rd[frame2] <= ref_frame_rd[0];
4588 }
4589 
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4590 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4591     const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4592     HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4593     PICK_MODE_CONTEXT *const ctx,
4594     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4595     const motion_mode_best_st_candidate *const best_motion_mode_cands,
4596     int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4597     InterModeSearchState *const search_state, int64_t *yrd) {
4598   const AV1_COMMON *const cm = &cpi->common;
4599   const int num_planes = av1_num_planes(cm);
4600   MACROBLOCKD *const xd = &x->e_mbd;
4601   MB_MODE_INFO *const mbmi = xd->mi[0];
4602   InterModesInfo *const inter_modes_info = x->inter_modes_info;
4603   const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4604 
4605   for (int cand = 0; cand < num_best_cand; cand++) {
4606     RD_STATS rd_stats;
4607     RD_STATS rd_stats_y;
4608     RD_STATS rd_stats_uv;
4609     av1_init_rd_stats(&rd_stats);
4610     av1_init_rd_stats(&rd_stats_y);
4611     av1_init_rd_stats(&rd_stats_uv);
4612     int rate_mv;
4613 
4614     rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4615     args->skip_motion_mode =
4616         best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4617     *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4618     rd_stats.rate =
4619         best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4620 
4621     // Continue if the best candidate is compound.
4622     if (!is_inter_singleref_mode(mbmi->mode)) continue;
4623 
4624     x->txfm_search_info.skip_txfm = 0;
4625     struct macroblockd_plane *p = xd->plane;
4626     const BUFFER_SET orig_dst = {
4627       { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
4628       { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
4629     };
4630 
4631     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4632     // Initialize motion mode to simple translation
4633     // Calculation of switchable rate depends on it.
4634     mbmi->motion_mode = 0;
4635     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4636     for (int i = 0; i < num_planes; i++) {
4637       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4638       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4639     }
4640 
4641     int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4642                            search_state->best_skip_rd[1] };
4643     int64_t this_yrd = INT64_MAX;
4644     int64_t ret_value = motion_mode_rd(
4645         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4646         search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4647         do_tx_search, inter_modes_info, 1, &this_yrd);
4648 
4649     if (ret_value != INT64_MAX) {
4650       rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4651       const THR_MODES mode_enum = get_prediction_mode_idx(
4652           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4653       // Collect mode stats for multiwinner mode processing
4654       store_winner_mode_stats(
4655           &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4656           mode_enum, NULL, bsize, rd_stats.rdcost,
4657           cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4658       if (rd_stats.rdcost < search_state->best_rd) {
4659         *yrd = this_yrd;
4660         update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4661                             &rd_stats_uv, mode_enum, x, do_tx_search);
4662         if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4663       }
4664     }
4665   }
4666 }
4667 
4668 /*!\cond */
4669 // Arguments for speed feature pruning of inter mode search
4670 typedef struct {
4671   int *skip_motion_mode;
4672   mode_skip_mask_t *mode_skip_mask;
4673   InterModeSearchState *search_state;
4674   int skip_ref_frame_mask;
4675   int reach_first_comp_mode;
4676   int mode_thresh_mul_fact;
4677   int intra_mode_idx_ls[INTRA_MODES];
4678   int intra_mode_num;
4679   int num_single_modes_processed;
4680   int prune_cpd_using_sr_stats_ready;
4681 } InterModeSFArgs;
4682 /*!\endcond */
4683 
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args)4684 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4685                            int64_t *ref_frame_rd, int midx,
4686                            InterModeSFArgs *args) {
4687   const SPEED_FEATURES *const sf = &cpi->sf;
4688   MACROBLOCKD *const xd = &x->e_mbd;
4689   MB_MODE_INFO *const mbmi = xd->mi[0];
4690   // Get the actual prediction mode we are trying in this iteration
4691   const THR_MODES mode_enum = av1_default_mode_order[midx];
4692   const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4693   const PREDICTION_MODE this_mode = mode_def->mode;
4694   const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4695   const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4696   const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4697   const int comp_pred = second_ref_frame > INTRA_FRAME;
4698 
4699   // Check if this mode should be skipped because it is incompatible with the
4700   // current frame
4701   if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
4702     return 1;
4703   const int ret = inter_mode_search_order_independent_skip(
4704       cpi, x, args->mode_skip_mask, args->search_state,
4705       args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
4706   if (ret == 1) return 1;
4707   *(args->skip_motion_mode) = (ret == 2);
4708 
4709   // We've reached the first compound prediction mode, get stats from the
4710   // single reference predictors to help with pruning
4711   if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
4712       args->reach_first_comp_mode == 0) {
4713     analyze_single_states(cpi, args->search_state);
4714     args->reach_first_comp_mode = 1;
4715   }
4716 
4717   // Prune aggressively when best mode is skippable.
4718   int mul_fact = args->search_state->best_mode_skippable
4719                      ? args->mode_thresh_mul_fact
4720                      : (1 << MODE_THRESH_QBITS);
4721   int64_t mode_threshold =
4722       (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
4723       MODE_THRESH_QBITS;
4724 
4725   if (args->search_state->best_rd < mode_threshold) return 1;
4726 
4727   // Skip this compound mode based on the RD results from the single prediction
4728   // modes
4729   if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
4730     if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
4731                                        ref_frame, second_ref_frame, x))
4732       return 1;
4733   }
4734 
4735   // Speed features to prune out INTRA frames
4736   if (ref_frame == INTRA_FRAME) {
4737     if ((!cpi->oxcf.intra_mode_cfg.enable_smooth_intra ||
4738          sf->intra_sf.disable_smooth_intra) &&
4739         (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
4740          mbmi->mode == SMOOTH_V_PRED))
4741       return 1;
4742     if (!cpi->oxcf.intra_mode_cfg.enable_paeth_intra &&
4743         mbmi->mode == PAETH_PRED)
4744       return 1;
4745 
4746     // Intra modes will be handled in another loop later.
4747     assert(args->intra_mode_num < INTRA_MODES);
4748     args->intra_mode_idx_ls[args->intra_mode_num++] = mode_enum;
4749     return 1;
4750   }
4751 
4752   if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
4753     // After we done with single reference modes, find the 2nd best RD
4754     // for a reference frame. Only search compound modes that have a reference
4755     // frame at least as good as the 2nd best.
4756     if (!args->prune_cpd_using_sr_stats_ready &&
4757         args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
4758       find_top_ref(ref_frame_rd);
4759       args->prune_cpd_using_sr_stats_ready = 1;
4760     }
4761     if (args->prune_cpd_using_sr_stats_ready &&
4762         !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
4763       return 1;
4764   }
4765 
4766   if (sf->inter_sf.prune_compound_using_neighbors && comp_pred) {
4767     if (compound_skip_using_neighbor_refs(
4768             xd, this_mode, ref_frames,
4769             sf->inter_sf.prune_compound_using_neighbors))
4770       return 1;
4771   }
4772 
4773   if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
4774     if (skip_compound_using_best_single_mode_ref(
4775             this_mode, ref_frames, args->search_state->best_single_mode,
4776             sf->inter_sf.prune_comp_using_best_single_mode_ref))
4777       return 1;
4778   }
4779 
4780   return 0;
4781 }
4782 
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)4783 static void record_best_compound(REFERENCE_MODE reference_mode,
4784                                  RD_STATS *rd_stats, int comp_pred, int rdmult,
4785                                  InterModeSearchState *search_state,
4786                                  int compmode_cost) {
4787   int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4788 
4789   if (reference_mode == REFERENCE_MODE_SELECT) {
4790     single_rate = rd_stats->rate - compmode_cost;
4791     hybrid_rate = rd_stats->rate;
4792   } else {
4793     single_rate = rd_stats->rate;
4794     hybrid_rate = rd_stats->rate + compmode_cost;
4795   }
4796 
4797   single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
4798   hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
4799 
4800   if (!comp_pred) {
4801     if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
4802       search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
4803   } else {
4804     if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
4805       search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4806   }
4807   if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
4808     search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4809 }
4810 
4811 // Does a transform search over a list of the best inter mode candidates.
4812 // This is called if the original mode search computed an RD estimate
4813 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)4814 static void tx_search_best_inter_candidates(
4815     AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
4816     int64_t best_rd_so_far, BLOCK_SIZE bsize,
4817     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
4818     InterModeSearchState *search_state, RD_STATS *rd_cost,
4819     PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
4820   AV1_COMMON *const cm = &cpi->common;
4821   MACROBLOCKD *const xd = &x->e_mbd;
4822   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4823   const ModeCosts *mode_costs = &x->mode_costs;
4824   const int num_planes = av1_num_planes(cm);
4825   const int skip_ctx = av1_get_skip_txfm_context(xd);
4826   MB_MODE_INFO *const mbmi = xd->mi[0];
4827   InterModesInfo *inter_modes_info = x->inter_modes_info;
4828   inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
4829   search_state->best_rd = best_rd_so_far;
4830   search_state->best_mode_index = THR_INVALID;
4831   // Initialize best mode stats for winner mode processing
4832   x->winner_mode_count = 0;
4833   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
4834                           NULL, bsize, best_rd_so_far,
4835                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
4836   inter_modes_info->num =
4837       inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
4838           ? inter_modes_info->num
4839           : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
4840   const int64_t top_est_rd =
4841       inter_modes_info->num > 0
4842           ? inter_modes_info
4843                 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
4844           : INT64_MAX;
4845   *yrd = INT64_MAX;
4846   int64_t best_rd_in_this_partition = INT64_MAX;
4847   // Iterate over best inter mode candidates and perform tx search
4848   for (int j = 0; j < inter_modes_info->num; ++j) {
4849     const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
4850     *mbmi = inter_modes_info->mbmi_arr[data_idx];
4851     int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
4852     if (curr_est_rd * 0.80 > top_est_rd) break;
4853 
4854     txfm_info->skip_txfm = 0;
4855     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4856 
4857     // Select prediction reference frames.
4858     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4859     for (int i = 0; i < num_planes; i++) {
4860       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4861       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4862     }
4863 
4864     // Build the prediction for this mode
4865     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
4866                                   av1_num_planes(cm) - 1);
4867     if (mbmi->motion_mode == OBMC_CAUSAL) {
4868       av1_build_obmc_inter_predictors_sb(cm, xd);
4869     }
4870 
4871     // Initialize RD stats
4872     RD_STATS rd_stats;
4873     RD_STATS rd_stats_y;
4874     RD_STATS rd_stats_uv;
4875     const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
4876     int64_t skip_rd = INT64_MAX;
4877     if (cpi->sf.inter_sf.txfm_rd_gate_level) {
4878       // Check if the mode is good enough based on skip RD
4879       int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
4880       skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
4881       int eval_txfm =
4882           check_txfm_eval(x, bsize, search_state->best_skip_rd[0], skip_rd,
4883                           cpi->sf.inter_sf.txfm_rd_gate_level, 0);
4884       if (!eval_txfm) continue;
4885     }
4886 
4887     int64_t this_yrd = INT64_MAX;
4888     // Do the transform search
4889     if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
4890                          mode_rate, search_state->best_rd)) {
4891       continue;
4892     } else {
4893       const int y_rate =
4894           rd_stats.skip_txfm
4895               ? mode_costs->skip_txfm_cost[skip_ctx][1]
4896               : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
4897       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
4898 
4899       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
4900         inter_mode_data_push(
4901             tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
4902             rd_stats_y.rate + rd_stats_uv.rate +
4903                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
4904       }
4905     }
4906     rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4907     if (rd_stats.rdcost < best_rd_in_this_partition) {
4908       best_rd_in_this_partition = rd_stats.rdcost;
4909       *yrd = this_yrd;
4910     }
4911 
4912     const THR_MODES mode_enum = get_prediction_mode_idx(
4913         mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4914 
4915     // Collect mode stats for multiwinner mode processing
4916     const int txfm_search_done = 1;
4917     store_winner_mode_stats(
4918         &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
4919         NULL, bsize, rd_stats.rdcost,
4920         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
4921 
4922     if (rd_stats.rdcost < search_state->best_rd) {
4923       update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4924                           &rd_stats_uv, mode_enum, x, txfm_search_done);
4925       search_state->best_skip_rd[0] = skip_rd;
4926     }
4927   }
4928 }
4929 
4930 // Indicates number of winner simple translation modes to be used
4931 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
4932 
4933 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
4934 // speed feature. This list consists of modes that have only searched
4935 // SIMPLE_TRANSLATION. The final list will be used to search other motion
4936 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)4937 static void handle_winner_cand(
4938     MB_MODE_INFO *const mbmi,
4939     motion_mode_best_st_candidate *best_motion_mode_cands,
4940     int max_winner_motion_mode_cand, int64_t this_rd,
4941     motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
4942   // Number of current motion mode candidates in list
4943   const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
4944   int valid_motion_mode_cand_loc = num_motion_mode_cand;
4945 
4946   // find the best location to insert new motion mode candidate
4947   for (int j = 0; j < num_motion_mode_cand; j++) {
4948     if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
4949       valid_motion_mode_cand_loc = j;
4950       break;
4951     }
4952   }
4953 
4954   // Insert motion mode if location is found
4955   if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
4956     if (num_motion_mode_cand > 0 &&
4957         valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
4958       memmove(
4959           &best_motion_mode_cands
4960                ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
4961           &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
4962           (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
4963            valid_motion_mode_cand_loc) *
4964               sizeof(best_motion_mode_cands->motion_mode_cand[0]));
4965     motion_mode_cand->mbmi = *mbmi;
4966     motion_mode_cand->rd_cost = this_rd;
4967     motion_mode_cand->skip_motion_mode = skip_motion_mode;
4968     best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
4969         *motion_mode_cand;
4970     best_motion_mode_cands->num_motion_mode_cand =
4971         AOMMIN(max_winner_motion_mode_cand,
4972                best_motion_mode_cands->num_motion_mode_cand + 1);
4973   }
4974 }
4975 
4976 /*!\brief Search intra modes in interframes
4977  *
4978  * \ingroup intra_mode_search
4979  *
4980  * This function searches for the best intra mode when the current frame is an
4981  * interframe. The list of luma intra mode candidates to be searched are stored
4982  * in InterModeSFArgs::intra_mode_idx_ls. This function however does *not*
4983  * handle luma palette mode. Palette mode is currently handled by \ref
4984  * av1_search_palette_mode.
4985  *
4986  * This function will first iterate through the luma mode candidates to find the
4987  * best luma intra mode. Once the best luma mode it's found, it will then search
4988  * for the best chroma mode. Because palette mode is currently not handled by
4989  * here, a cache of uv mode is stored in
4990  * InterModeSearchState::intra_search_state so it can be reused later by \ref
4991  * av1_search_palette_mode.
4992  *
4993  * \return Returns the rdcost of the current intra-mode if it's available,
4994  * otherwise returns INT64_MAX. The corresponding values in x->e_mbd.mi[0],
4995  * rd_stats, rd_stats_y/uv, and best_intra_rd are also updated. Moreover, in the
4996  * first evocation of the function, the chroma intra mode result is cached in
4997  * intra_search_state to be used in subsequent calls. In the first evaluation
4998  * with directional mode, a prune_mask computed with histogram of gradient is
4999  * also stored in intra_search_state.
5000  *
5001  * \param[in,out] search_state      Struct keep track of the prediction mode
5002  *                                  search state in interframe.
5003  *
5004  * \param[in]     cpi               Top-level encoder structure.
5005  * \param[in]     x                 Pointer to struct holding all the data for
5006  *                                  the current prediction block.
5007  * \param[out]    rd_cost           Stores the best rd_cost among all the
5008  *                                  prediction modes searched.
5009  * \param[in]     bsize             Current block size.
5010  * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5011  *                                  copy the tx_type and txfm_skip arrays.
5012  *                                  for only the Y plane.
5013  * \param[in,out] sf_args           Stores the list of intra mode candidates
5014  *                                  to be searched.
5015  * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5016  *                                      current ref frame is an intra frame.
5017  * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5018  *                                  terminate chroma intra mode search.
5019  *
5020  * \return Returns INT64_MAX if the determined motion mode is invalid and the
5021  * current motion mode being tested should be skipped. It returns 0 if the
5022  * motion mode search is a success.
5023  */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5024 static AOM_INLINE void search_intra_modes_in_interframe(
5025     InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5026     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5027     InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5028     int64_t yrd_threshold) {
5029   const AV1_COMMON *const cm = &cpi->common;
5030   const SPEED_FEATURES *const sf = &cpi->sf;
5031   MACROBLOCKD *const xd = &x->e_mbd;
5032   MB_MODE_INFO *const mbmi = xd->mi[0];
5033   IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5034 
5035   int is_best_y_mode_intra = 0;
5036   RD_STATS best_intra_rd_stats_y;
5037   int64_t best_rd_y = INT64_MAX;
5038   int best_mode_cost_y = -1;
5039   MB_MODE_INFO best_mbmi = *xd->mi[0];
5040   THR_MODES best_mode_enum = THR_INVALID;
5041   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5042   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5043   const int num_4x4 = bsize_to_num_blk(bsize);
5044 
5045   // Performs luma search
5046   for (int j = 0; j < sf_args->intra_mode_num; ++j) {
5047     if (sf->intra_sf.skip_intra_in_interframe &&
5048         search_state->intra_search_state.skip_intra_modes)
5049       break;
5050     const THR_MODES mode_enum = sf_args->intra_mode_idx_ls[j];
5051     const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5052     const PREDICTION_MODE this_mode = mode_def->mode;
5053 
5054     assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5055     assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5056     init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5057     x->txfm_search_info.skip_txfm = 0;
5058 
5059     if (this_mode != DC_PRED) {
5060       // Only search the oblique modes if the best so far is
5061       // one of the neighboring directional modes
5062       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5063           (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5064         if (search_state->best_mode_index != THR_INVALID &&
5065             search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5066           continue;
5067       }
5068       if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5069         if (conditional_skipintra(
5070                 this_mode, search_state->intra_search_state.best_intra_mode))
5071           continue;
5072       }
5073     }
5074 
5075     RD_STATS intra_rd_stats_y;
5076     int mode_cost_y;
5077     int64_t intra_rd_y = INT64_MAX;
5078     const int is_luma_result_valid = av1_handle_intra_y_mode(
5079         intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5080         &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y);
5081     if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5082       is_best_y_mode_intra = 1;
5083       if (intra_rd_y < best_rd_y) {
5084         best_intra_rd_stats_y = intra_rd_stats_y;
5085         best_mode_cost_y = mode_cost_y;
5086         best_rd_y = intra_rd_y;
5087         best_mbmi = *mbmi;
5088         best_mode_enum = mode_enum;
5089         memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5090                sizeof(best_blk_skip[0]) * num_4x4);
5091         av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5092       }
5093     }
5094   }
5095 
5096   if (!is_best_y_mode_intra) {
5097     return;
5098   }
5099 
5100   assert(best_rd_y < INT64_MAX);
5101 
5102   // Restores the best luma mode
5103   *mbmi = best_mbmi;
5104   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5105          sizeof(best_blk_skip[0]) * num_4x4);
5106   av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5107 
5108   // Performs chroma search
5109   RD_STATS intra_rd_stats, intra_rd_stats_uv;
5110   av1_init_rd_stats(&intra_rd_stats);
5111   av1_init_rd_stats(&intra_rd_stats_uv);
5112   const int num_planes = av1_num_planes(cm);
5113   if (num_planes > 1) {
5114     const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5115         intra_search_state, cpi, x, bsize, &intra_rd_stats,
5116         &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5117 
5118     if (!intra_uv_mode_valid) {
5119       return;
5120     }
5121   }
5122 
5123   // Merge the luma and chroma rd stats
5124   assert(best_mode_cost_y >= 0);
5125   intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5126   if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5127     // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5128     // in the tokenonly rate, but for intra blocks, tx_size is always coded
5129     // (prediction granularity), so we account for it in the full rate,
5130     // not the tokenonly rate.
5131     best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5132   }
5133 
5134   const ModeCosts *mode_costs = &x->mode_costs;
5135   const PREDICTION_MODE mode = mbmi->mode;
5136   if (num_planes > 1 && xd->is_chroma_ref) {
5137     const int uv_mode_cost =
5138         mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5139     intra_rd_stats.rate +=
5140         intra_rd_stats_uv.rate +
5141         intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5142   }
5143   if (mode != DC_PRED && mode != PAETH_PRED) {
5144     const int intra_cost_penalty = av1_get_intra_cost_penalty(
5145         cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
5146         cm->seq_params.bit_depth);
5147     intra_rd_stats.rate += intra_cost_penalty;
5148   }
5149 
5150   // Intra block is always coded as non-skip
5151   intra_rd_stats.skip_txfm = 0;
5152   intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5153   // Add in the cost of the no skip flag.
5154   const int skip_ctx = av1_get_skip_txfm_context(xd);
5155   intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5156   // Calculate the final RD estimate for this mode.
5157   const int64_t this_rd =
5158       RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5159   // Keep record of best intra rd
5160   if (this_rd < search_state->best_intra_rd) {
5161     search_state->best_intra_rd = this_rd;
5162     intra_search_state->best_intra_mode = mode;
5163   }
5164 
5165   for (int i = 0; i < REFERENCE_MODES; ++i) {
5166     search_state->best_pred_rd[i] =
5167         AOMMIN(search_state->best_pred_rd[i], this_rd);
5168   }
5169 
5170   intra_rd_stats.rdcost = this_rd;
5171 
5172   // Collect mode stats for multiwinner mode processing
5173   const int txfm_search_done = 1;
5174   store_winner_mode_stats(
5175       &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5176       &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5177       cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5178   if (intra_rd_stats.rdcost < search_state->best_rd) {
5179     update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5180                         &best_intra_rd_stats_y, &intra_rd_stats_uv,
5181                         best_mode_enum, x, txfm_search_done);
5182   }
5183 }
5184 
5185 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5186 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5187                             struct macroblock *x, struct RD_STATS *rd_cost,
5188                             BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5189                             int64_t best_rd_so_far) {
5190   AV1_COMMON *const cm = &cpi->common;
5191   const FeatureFlags *const features = &cm->features;
5192   const int num_planes = av1_num_planes(cm);
5193   const SPEED_FEATURES *const sf = &cpi->sf;
5194   MACROBLOCKD *const xd = &x->e_mbd;
5195   MB_MODE_INFO *const mbmi = xd->mi[0];
5196   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5197   int i;
5198   const ModeCosts *mode_costs = &x->mode_costs;
5199   const int *comp_inter_cost =
5200       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5201 
5202   InterModeSearchState search_state;
5203   init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5204   INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5205     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5206     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5207   };
5208   HandleInterModeArgs args = { { NULL },
5209                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5210                                { NULL },
5211                                { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5212                                  MAX_SB_SIZE >> 1 },
5213                                NULL,
5214                                NULL,
5215                                NULL,
5216                                search_state.modelled_rd,
5217                                INT_MAX,
5218                                INT_MAX,
5219                                search_state.simple_rd,
5220                                0,
5221                                interintra_modes,
5222                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5223                                0,
5224                                -1,
5225                                -1,
5226                                -1,
5227                                { 0 } };
5228   for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5229   // Indicates the appropriate number of simple translation winner modes for
5230   // exhaustive motion mode evaluation
5231   const int max_winner_motion_mode_cand =
5232       num_winner_motion_modes[cpi->sf.winner_mode_sf
5233                                   .motion_mode_for_winner_cand];
5234   assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5235   motion_mode_candidate motion_mode_cand;
5236   motion_mode_best_st_candidate best_motion_mode_cands;
5237   // Initializing the number of motion mode candidates to zero.
5238   best_motion_mode_cands.num_motion_mode_cand = 0;
5239   for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5240     best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5241 
5242   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5243 
5244   av1_invalid_rd_stats(rd_cost);
5245 
5246   for (i = 0; i < REF_FRAMES; ++i) {
5247     x->warp_sample_info[i].num = -1;
5248   }
5249 
5250   // Ref frames that are selected by square partition blocks.
5251   int picked_ref_frames_mask = 0;
5252   if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions &&
5253       mbmi->partition != PARTITION_NONE) {
5254     // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5255     // partition blocks. prune_ref_frame_for_rect_partitions >=2
5256     // implies prune for vert, horiz and extended partition blocks.
5257     if ((mbmi->partition != PARTITION_VERT &&
5258          mbmi->partition != PARTITION_HORZ) ||
5259         cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5260       picked_ref_frames_mask =
5261           fetch_picked_ref_frames_mask(x, bsize, cm->seq_params.mib_size);
5262     }
5263   }
5264 
5265   // Skip ref frames that never selected by square blocks.
5266   const int skip_ref_frame_mask =
5267       picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5268   mode_skip_mask_t mode_skip_mask;
5269   unsigned int ref_costs_single[REF_FRAMES];
5270   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5271   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5272   // init params, set frame modes, speed features
5273   set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5274                                 skip_ref_frame_mask, ref_costs_single,
5275                                 ref_costs_comp, yv12_mb);
5276 
5277   int64_t best_est_rd = INT64_MAX;
5278   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5279   // If do_tx_search is 0, only estimated RD should be computed.
5280   // If do_tx_search is 1, all modes have TX search performed.
5281   const int do_tx_search =
5282       !((cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5283         (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 &&
5284          num_pels_log2_lookup[bsize] > 8) ||
5285         cpi->sf.rt_sf.force_tx_search_off);
5286   InterModesInfo *inter_modes_info = x->inter_modes_info;
5287   inter_modes_info->num = 0;
5288 
5289   // Temporary buffers used by handle_inter_mode().
5290   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5291 
5292   // The best RD found for the reference frame, among single reference modes.
5293   // Note that the 0-th element will contain a cut-off that is later used
5294   // to determine if we should skip a compound mode.
5295   int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5296                                        INT64_MAX, INT64_MAX, INT64_MAX,
5297                                        INT64_MAX, INT64_MAX };
5298 
5299   // Prepared stats used later to check if we could skip intra mode eval.
5300   int64_t inter_cost = -1;
5301   int64_t intra_cost = -1;
5302   // Need to tweak the threshold for hdres speed 0 & 1.
5303   const int mi_row = xd->mi_row;
5304   const int mi_col = xd->mi_col;
5305 
5306   // Obtain the relevant tpl stats for pruning inter modes
5307   PruneInfoFromTpl inter_cost_info_from_tpl;
5308 #if !CONFIG_REALTIME_ONLY
5309   if (cpi->sf.inter_sf.prune_inter_modes_based_on_tpl) {
5310     // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5311     // prune_ref_by_selective_ref_frame()
5312     // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5313     // prune_ref_by_selective_ref_frame()
5314     // Populating valid_refs[idx] = 1 ensures that
5315     // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5316     // pruned ref frame.
5317     int valid_refs[INTER_REFS_PER_FRAME];
5318     for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5319       const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5320       valid_refs[frame - 1] =
5321           x->tpl_keep_ref_frame[frame] ||
5322           !prune_ref_by_selective_ref_frame(
5323               cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5324     }
5325     av1_zero(inter_cost_info_from_tpl);
5326     get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5327                               &inter_cost_info_from_tpl);
5328   }
5329 #endif
5330   const int do_pruning =
5331       (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5332   if (do_pruning && sf->intra_sf.skip_intra_in_interframe) {
5333     // Only consider full SB.
5334     const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
5335     const int tpl_bsize_1d = cpi->tpl_data.tpl_bsize_1d;
5336     const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5337                     (block_size_high[sb_size] / tpl_bsize_1d);
5338     SuperBlockEnc *sb_enc = &x->sb_enc;
5339     if (sb_enc->tpl_data_count == len) {
5340       const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5341       const int tpl_stride = sb_enc->tpl_stride;
5342       const int tplw = mi_size_wide[tpl_bsize];
5343       const int tplh = mi_size_high[tpl_bsize];
5344       const int nw = mi_size_wide[bsize] / tplw;
5345       const int nh = mi_size_high[bsize] / tplh;
5346       if (nw >= 1 && nh >= 1) {
5347         const int of_h = mi_row % mi_size_high[sb_size];
5348         const int of_w = mi_col % mi_size_wide[sb_size];
5349         const int start = of_h / tplh * tpl_stride + of_w / tplw;
5350 
5351         for (int k = 0; k < nh; k++) {
5352           for (int l = 0; l < nw; l++) {
5353             inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5354             intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5355           }
5356         }
5357         inter_cost /= nw * nh;
5358         intra_cost /= nw * nh;
5359       }
5360     }
5361   }
5362 
5363   // Initialize best mode stats for winner mode processing
5364   av1_zero(x->winner_mode_stats);
5365   x->winner_mode_count = 0;
5366   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5367                           NULL, bsize, best_rd_so_far,
5368                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5369 
5370   int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5371   if (sf->inter_sf.prune_inter_modes_if_skippable) {
5372     // Higher multiplication factor values for lower quantizers.
5373     mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5374   }
5375 
5376   // Initialize arguments for mode loop speed features
5377   InterModeSFArgs sf_args = { &args.skip_motion_mode,
5378                               &mode_skip_mask,
5379                               &search_state,
5380                               skip_ref_frame_mask,
5381                               0,
5382                               mode_thresh_mul_fact,
5383                               { 0 },
5384                               0,
5385                               0,
5386                               0 };
5387   int64_t best_inter_yrd = INT64_MAX;
5388 
5389   // This is the main loop of this function. It loops over all possible modes
5390   // and calls handle_inter_mode() to compute the RD for each.
5391   // Here midx is just an iterator index that should not be used by itself
5392   // except to keep track of the number of modes searched. It should be used
5393   // with av1_default_mode_order to get the enum that defines the mode, which
5394   // can be used with av1_mode_defs to get the prediction mode and the ref
5395   // frames.
5396   for (THR_MODES midx = THR_MODE_START; midx < THR_MODE_END; ++midx) {
5397     // Get the actual prediction mode we are trying in this iteration
5398     const THR_MODES mode_enum = av1_default_mode_order[midx];
5399     const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5400     const PREDICTION_MODE this_mode = mode_def->mode;
5401     const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5402 
5403     const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5404     const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5405     const int is_single_pred =
5406         ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5407     const int comp_pred = second_ref_frame > INTRA_FRAME;
5408 
5409     init_mbmi(mbmi, this_mode, ref_frames, cm);
5410 
5411     txfm_info->skip_txfm = 0;
5412     sf_args.num_single_modes_processed += is_single_pred;
5413     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5414 
5415     // Apply speed features to decide if this inter mode can be skipped
5416     if (skip_inter_mode(cpi, x, bsize, ref_frame_rd, midx, &sf_args)) continue;
5417 
5418     // Select prediction reference frames.
5419     for (i = 0; i < num_planes; i++) {
5420       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5421       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5422     }
5423 
5424     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5425     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5426     mbmi->filter_intra_mode_info.use_filter_intra = 0;
5427     mbmi->ref_mv_idx = 0;
5428 
5429     const int64_t ref_best_rd = search_state.best_rd;
5430     RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5431     av1_init_rd_stats(&rd_stats);
5432 
5433     const int ref_frame_cost = comp_pred
5434                                    ? ref_costs_comp[ref_frame][second_ref_frame]
5435                                    : ref_costs_single[ref_frame];
5436     const int compmode_cost =
5437         is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
5438     const int real_compmode_cost =
5439         cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
5440             ? compmode_cost
5441             : 0;
5442     // Point to variables that are maintained between loop iterations
5443     args.single_newmv = search_state.single_newmv;
5444     args.single_newmv_rate = search_state.single_newmv_rate;
5445     args.single_newmv_valid = search_state.single_newmv_valid;
5446     args.single_comp_cost = real_compmode_cost;
5447     args.ref_frame_cost = ref_frame_cost;
5448 
5449     int64_t skip_rd[2] = { search_state.best_skip_rd[0],
5450                            search_state.best_skip_rd[1] };
5451     int64_t this_yrd = INT64_MAX;
5452 #if CONFIG_COLLECT_COMPONENT_TIMING
5453     start_timing(cpi, handle_inter_mode_time);
5454 #endif
5455     int64_t this_rd = handle_inter_mode(
5456         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
5457         ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
5458         inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
5459         &this_yrd);
5460 #if CONFIG_COLLECT_COMPONENT_TIMING
5461     end_timing(cpi, handle_inter_mode_time);
5462 #endif
5463     if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
5464         is_inter_singleref_mode(this_mode)) {
5465       collect_single_states(x, &search_state, mbmi);
5466     }
5467 
5468     if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
5469         is_inter_singleref_mode(this_mode))
5470       update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
5471 
5472     if (this_rd == INT64_MAX) continue;
5473 
5474     if (mbmi->skip_txfm) {
5475       rd_stats_y.rate = 0;
5476       rd_stats_uv.rate = 0;
5477     }
5478 
5479     if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
5480         this_rd < ref_frame_rd[ref_frame]) {
5481       ref_frame_rd[ref_frame] = this_rd;
5482     }
5483 
5484     // Did this mode help, i.e., is it the new best mode
5485     if (this_rd < search_state.best_rd) {
5486       assert(IMPLIES(comp_pred,
5487                      cm->current_frame.reference_mode != SINGLE_REFERENCE));
5488       search_state.best_pred_sse = x->pred_sse[ref_frame];
5489       best_inter_yrd = this_yrd;
5490       update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5491                           &rd_stats_uv, mode_enum, x, do_tx_search);
5492       if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
5493       search_state.best_skip_rd[1] = skip_rd[1];
5494     }
5495     if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5496       // Add this mode to motion mode candidate list for motion mode search
5497       // if using motion_mode_for_winner_cand speed feature
5498       handle_winner_cand(mbmi, &best_motion_mode_cands,
5499                          max_winner_motion_mode_cand, this_rd,
5500                          &motion_mode_cand, args.skip_motion_mode);
5501     }
5502 
5503     /* keep record of best compound/single-only prediction */
5504     record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
5505                          x->rdmult, &search_state, compmode_cost);
5506   }
5507 
5508 #if CONFIG_COLLECT_COMPONENT_TIMING
5509   start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5510 #endif
5511   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5512     // For the single ref winner candidates, evaluate other motion modes (non
5513     // simple translation).
5514     evaluate_motion_mode_for_winner_candidates(
5515         cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
5516         &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
5517         &search_state, &best_inter_yrd);
5518   }
5519 #if CONFIG_COLLECT_COMPONENT_TIMING
5520   end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5521 #endif
5522 
5523 #if CONFIG_COLLECT_COMPONENT_TIMING
5524   start_timing(cpi, do_tx_search_time);
5525 #endif
5526   if (do_tx_search != 1) {
5527     // A full tx search has not yet been done, do tx search for
5528     // top mode candidates
5529     tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
5530                                     yv12_mb, mi_row, mi_col, &search_state,
5531                                     rd_cost, ctx, &best_inter_yrd);
5532   }
5533 #if CONFIG_COLLECT_COMPONENT_TIMING
5534   end_timing(cpi, do_tx_search_time);
5535 #endif
5536 
5537 #if CONFIG_COLLECT_COMPONENT_TIMING
5538   start_timing(cpi, handle_intra_mode_time);
5539 #endif
5540   // Gate intra mode evaluation if best of inter is skip except when source
5541   // variance is extremely low
5542   const unsigned int src_var_thresh_intra_skip = 1;
5543   if (sf->intra_sf.skip_intra_in_interframe &&
5544       (x->source_variance > src_var_thresh_intra_skip)) {
5545     if (inter_cost >= 0 && intra_cost >= 0) {
5546       aom_clear_system_state();
5547       const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5548                                        ? &av1_intrap_nn_config
5549                                        : &av1_intrap_hd_nn_config;
5550       float nn_features[6];
5551       float scores[2] = { 0.0f };
5552       float probs[2] = { 0.0f };
5553 
5554       nn_features[0] = (float)search_state.best_mbmode.skip_txfm;
5555       nn_features[1] = (float)mi_size_wide_log2[bsize];
5556       nn_features[2] = (float)mi_size_high_log2[bsize];
5557       nn_features[3] = (float)intra_cost;
5558       nn_features[4] = (float)inter_cost;
5559       const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5560       const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5561       nn_features[5] = (float)(ac_q_max / ac_q);
5562 
5563       av1_nn_predict(nn_features, nn_config, 1, scores);
5564       aom_clear_system_state();
5565       av1_nn_softmax(scores, probs, 2);
5566 
5567       if (probs[1] > 0.8) search_state.intra_search_state.skip_intra_modes = 1;
5568     } else if ((search_state.best_mbmode.skip_txfm) &&
5569                (sf->intra_sf.skip_intra_in_interframe >= 2)) {
5570       search_state.intra_search_state.skip_intra_modes = 1;
5571     }
5572   }
5573 
5574   const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
5575   search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
5576                                    &sf_args, intra_ref_frame_cost,
5577                                    best_inter_yrd);
5578 #if CONFIG_COLLECT_COMPONENT_TIMING
5579   end_timing(cpi, handle_intra_mode_time);
5580 #endif
5581 
5582   int winner_mode_count =
5583       cpi->sf.winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
5584   // In effect only when fast tx search speed features are enabled.
5585   refine_winner_mode_tx(
5586       cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
5587       &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
5588       search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
5589 
5590   // Initialize default mode evaluation params
5591   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
5592 
5593   // Only try palette mode when the best mode so far is an intra mode.
5594   const int try_palette =
5595       cpi->oxcf.tool_cfg.enable_palette &&
5596       av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
5597       !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
5598   RD_STATS this_rd_cost;
5599   int this_skippable = 0;
5600   if (try_palette) {
5601 #if CONFIG_COLLECT_COMPONENT_TIMING
5602     start_timing(cpi, av1_search_palette_mode_time);
5603 #endif
5604     this_skippable = av1_search_palette_mode(
5605         &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
5606         ctx, &this_rd_cost, search_state.best_rd);
5607 #if CONFIG_COLLECT_COMPONENT_TIMING
5608     end_timing(cpi, av1_search_palette_mode_time);
5609 #endif
5610     if (this_rd_cost.rdcost < search_state.best_rd) {
5611       search_state.best_mode_index = THR_DC;
5612       mbmi->mv[0].as_int = 0;
5613       rd_cost->rate = this_rd_cost.rate;
5614       rd_cost->dist = this_rd_cost.dist;
5615       rd_cost->rdcost = this_rd_cost.rdcost;
5616       search_state.best_rd = rd_cost->rdcost;
5617       search_state.best_mbmode = *mbmi;
5618       search_state.best_skip2 = 0;
5619       search_state.best_mode_skippable = this_skippable;
5620       memcpy(ctx->blk_skip, txfm_info->blk_skip,
5621              sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
5622       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5623     }
5624   }
5625 
5626   search_state.best_mbmode.skip_mode = 0;
5627   if (cm->current_frame.skip_mode_info.skip_mode_flag &&
5628       is_comp_ref_allowed(bsize)) {
5629     const struct segmentation *const seg = &cm->seg;
5630     unsigned char segment_id = mbmi->segment_id;
5631     if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
5632       rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
5633     }
5634   }
5635 
5636   // Make sure that the ref_mv_idx is only nonzero when we're
5637   // using a mode which can support ref_mv_idx
5638   if (search_state.best_mbmode.ref_mv_idx != 0 &&
5639       !(search_state.best_mbmode.mode == NEWMV ||
5640         search_state.best_mbmode.mode == NEW_NEWMV ||
5641         have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
5642     search_state.best_mbmode.ref_mv_idx = 0;
5643   }
5644 
5645   if (search_state.best_mode_index == THR_INVALID ||
5646       search_state.best_rd >= best_rd_so_far) {
5647     rd_cost->rate = INT_MAX;
5648     rd_cost->rdcost = INT64_MAX;
5649     return;
5650   }
5651 
5652   const InterpFilter interp_filter = features->interp_filter;
5653   assert((interp_filter == SWITCHABLE) ||
5654          (interp_filter ==
5655           search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
5656          !is_inter_block(&search_state.best_mbmode));
5657   assert((interp_filter == SWITCHABLE) ||
5658          (interp_filter ==
5659           search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
5660          !is_inter_block(&search_state.best_mbmode));
5661 
5662   if (!cpi->rc.is_src_frame_alt_ref && cpi->sf.inter_sf.adaptive_rd_thresh) {
5663     av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
5664                               sf->inter_sf.adaptive_rd_thresh, bsize,
5665                               search_state.best_mode_index);
5666   }
5667 
5668   // macroblock modes
5669   *mbmi = search_state.best_mbmode;
5670   txfm_info->skip_txfm |= search_state.best_skip2;
5671 
5672   // Note: this section is needed since the mode may have been forced to
5673   // GLOBALMV by the all-zero mode handling of ref-mv.
5674   if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
5675     // Correct the interp filters for GLOBALMV
5676     if (is_nontrans_global_motion(xd, xd->mi[0])) {
5677       int_interpfilters filters =
5678           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
5679       assert(mbmi->interp_filters.as_int == filters.as_int);
5680       (void)filters;
5681     }
5682   }
5683 
5684   for (i = 0; i < REFERENCE_MODES; ++i) {
5685     if (search_state.best_pred_rd[i] == INT64_MAX) {
5686       search_state.best_pred_diff[i] = INT_MIN;
5687     } else {
5688       search_state.best_pred_diff[i] =
5689           search_state.best_rd - search_state.best_pred_rd[i];
5690     }
5691   }
5692 
5693   txfm_info->skip_txfm |= search_state.best_mode_skippable;
5694 
5695   assert(search_state.best_mode_index != THR_INVALID);
5696 
5697 #if CONFIG_INTERNAL_STATS
5698   store_coding_context(x, ctx, search_state.best_mode_index,
5699                        search_state.best_pred_diff,
5700                        search_state.best_mode_skippable);
5701 #else
5702   store_coding_context(x, ctx, search_state.best_pred_diff,
5703                        search_state.best_mode_skippable);
5704 #endif  // CONFIG_INTERNAL_STATS
5705 
5706   if (mbmi->palette_mode_info.palette_size[1] > 0) {
5707     assert(try_palette);
5708     av1_restore_uv_color_map(cpi, x);
5709   }
5710 }
5711 
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5712 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
5713                                         TileDataEnc *tile_data, MACROBLOCK *x,
5714                                         int mi_row, int mi_col,
5715                                         RD_STATS *rd_cost, BLOCK_SIZE bsize,
5716                                         PICK_MODE_CONTEXT *ctx,
5717                                         int64_t best_rd_so_far) {
5718   const AV1_COMMON *const cm = &cpi->common;
5719   const FeatureFlags *const features = &cm->features;
5720   MACROBLOCKD *const xd = &x->e_mbd;
5721   MB_MODE_INFO *const mbmi = xd->mi[0];
5722   unsigned char segment_id = mbmi->segment_id;
5723   const int comp_pred = 0;
5724   int i;
5725   int64_t best_pred_diff[REFERENCE_MODES];
5726   unsigned int ref_costs_single[REF_FRAMES];
5727   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5728   const ModeCosts *mode_costs = &x->mode_costs;
5729   const int *comp_inter_cost =
5730       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5731   InterpFilter best_filter = SWITCHABLE;
5732   int64_t this_rd = INT64_MAX;
5733   int rate2 = 0;
5734   const int64_t distortion2 = 0;
5735   (void)mi_row;
5736   (void)mi_col;
5737   (void)tile_data;
5738 
5739   av1_collect_neighbors_ref_counts(xd);
5740 
5741   estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
5742                            ref_costs_comp);
5743 
5744   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5745   for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
5746 
5747   rd_cost->rate = INT_MAX;
5748 
5749   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
5750 
5751   mbmi->palette_mode_info.palette_size[0] = 0;
5752   mbmi->palette_mode_info.palette_size[1] = 0;
5753   mbmi->filter_intra_mode_info.use_filter_intra = 0;
5754   mbmi->mode = GLOBALMV;
5755   mbmi->motion_mode = SIMPLE_TRANSLATION;
5756   mbmi->uv_mode = UV_DC_PRED;
5757   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
5758     mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
5759   else
5760     mbmi->ref_frame[0] = LAST_FRAME;
5761   mbmi->ref_frame[1] = NONE_FRAME;
5762   mbmi->mv[0].as_int =
5763       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
5764                            features->allow_high_precision_mv, bsize, mi_col,
5765                            mi_row, features->cur_frame_force_integer_mv)
5766           .as_int;
5767   mbmi->tx_size = max_txsize_lookup[bsize];
5768   x->txfm_search_info.skip_txfm = 1;
5769 
5770   mbmi->ref_mv_idx = 0;
5771 
5772   mbmi->motion_mode = SIMPLE_TRANSLATION;
5773   av1_count_overlappable_neighbors(cm, xd);
5774   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
5775     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
5776     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
5777     // Select the samples according to motion vector difference
5778     if (mbmi->num_proj_ref > 1) {
5779       mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
5780                                              mbmi->num_proj_ref, bsize);
5781     }
5782   }
5783 
5784   const InterpFilter interp_filter = features->interp_filter;
5785   set_default_interp_filters(mbmi, interp_filter);
5786 
5787   if (interp_filter != SWITCHABLE) {
5788     best_filter = interp_filter;
5789   } else {
5790     best_filter = EIGHTTAP_REGULAR;
5791     if (av1_is_interp_needed(xd)) {
5792       int rs;
5793       int best_rs = INT_MAX;
5794       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
5795         mbmi->interp_filters = av1_broadcast_interp_filter(i);
5796         rs = av1_get_switchable_rate(x, xd, interp_filter,
5797                                      cm->seq_params.enable_dual_filter);
5798         if (rs < best_rs) {
5799           best_rs = rs;
5800           best_filter = mbmi->interp_filters.as_filters.y_filter;
5801         }
5802       }
5803     }
5804   }
5805   // Set the appropriate filter
5806   mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
5807   rate2 += av1_get_switchable_rate(x, xd, interp_filter,
5808                                    cm->seq_params.enable_dual_filter);
5809 
5810   if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
5811     rate2 += comp_inter_cost[comp_pred];
5812 
5813   // Estimate the reference frame signaling cost and add it
5814   // to the rolling cost variable.
5815   rate2 += ref_costs_single[LAST_FRAME];
5816   this_rd = RDCOST(x->rdmult, rate2, distortion2);
5817 
5818   rd_cost->rate = rate2;
5819   rd_cost->dist = distortion2;
5820   rd_cost->rdcost = this_rd;
5821 
5822   if (this_rd >= best_rd_so_far) {
5823     rd_cost->rate = INT_MAX;
5824     rd_cost->rdcost = INT64_MAX;
5825     return;
5826   }
5827 
5828   assert((interp_filter == SWITCHABLE) ||
5829          (interp_filter == mbmi->interp_filters.as_filters.y_filter));
5830 
5831   if (cpi->sf.inter_sf.adaptive_rd_thresh) {
5832     av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
5833                               cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
5834                               THR_GLOBALMV);
5835   }
5836 
5837   av1_zero(best_pred_diff);
5838 
5839 #if CONFIG_INTERNAL_STATS
5840   store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
5841 #else
5842   store_coding_context(x, ctx, best_pred_diff, 0);
5843 #endif  // CONFIG_INTERNAL_STATS
5844 }
5845 
5846 /*!\cond */
5847 struct calc_target_weighted_pred_ctxt {
5848   const OBMCBuffer *obmc_buffer;
5849   const uint8_t *tmp;
5850   int tmp_stride;
5851   int overlap;
5852 };
5853 /*!\endcond */
5854 
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)5855 static INLINE void calc_target_weighted_pred_above(
5856     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
5857     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
5858   (void)nb_mi;
5859   (void)num_planes;
5860   (void)rel_mi_row;
5861   (void)dir;
5862 
5863   struct calc_target_weighted_pred_ctxt *ctxt =
5864       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
5865 
5866   const int bw = xd->width << MI_SIZE_LOG2;
5867   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
5868 
5869   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
5870   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
5871   const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
5872   const int is_hbd = is_cur_buf_hbd(xd);
5873 
5874   if (!is_hbd) {
5875     for (int row = 0; row < ctxt->overlap; ++row) {
5876       const uint8_t m0 = mask1d[row];
5877       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5878       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
5879         wsrc[col] = m1 * tmp[col];
5880         mask[col] = m0;
5881       }
5882       wsrc += bw;
5883       mask += bw;
5884       tmp += ctxt->tmp_stride;
5885     }
5886   } else {
5887     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
5888 
5889     for (int row = 0; row < ctxt->overlap; ++row) {
5890       const uint8_t m0 = mask1d[row];
5891       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5892       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
5893         wsrc[col] = m1 * tmp16[col];
5894         mask[col] = m0;
5895       }
5896       wsrc += bw;
5897       mask += bw;
5898       tmp16 += ctxt->tmp_stride;
5899     }
5900   }
5901 }
5902 
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)5903 static INLINE void calc_target_weighted_pred_left(
5904     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
5905     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
5906   (void)nb_mi;
5907   (void)num_planes;
5908   (void)rel_mi_col;
5909   (void)dir;
5910 
5911   struct calc_target_weighted_pred_ctxt *ctxt =
5912       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
5913 
5914   const int bw = xd->width << MI_SIZE_LOG2;
5915   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
5916 
5917   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
5918   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
5919   const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
5920   const int is_hbd = is_cur_buf_hbd(xd);
5921 
5922   if (!is_hbd) {
5923     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
5924       for (int col = 0; col < ctxt->overlap; ++col) {
5925         const uint8_t m0 = mask1d[col];
5926         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5927         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
5928                     (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
5929         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
5930       }
5931       wsrc += bw;
5932       mask += bw;
5933       tmp += ctxt->tmp_stride;
5934     }
5935   } else {
5936     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
5937 
5938     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
5939       for (int col = 0; col < ctxt->overlap; ++col) {
5940         const uint8_t m0 = mask1d[col];
5941         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
5942         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
5943                     (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
5944         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
5945       }
5946       wsrc += bw;
5947       mask += bw;
5948       tmp16 += ctxt->tmp_stride;
5949     }
5950   }
5951 }
5952 
5953 // This function has a structure similar to av1_build_obmc_inter_prediction
5954 //
5955 // The OBMC predictor is computed as:
5956 //
5957 //  PObmc(x,y) =
5958 //    AOM_BLEND_A64(Mh(x),
5959 //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
5960 //                  PLeft(x, y))
5961 //
5962 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
5963 // rounding, this can be written as:
5964 //
5965 //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
5966 //    Mh(x) * Mv(y) * P(x,y) +
5967 //      Mh(x) * Cv(y) * Pabove(x,y) +
5968 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
5969 //
5970 // Where :
5971 //
5972 //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
5973 //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
5974 //
5975 // This function computes 'wsrc' and 'mask' as:
5976 //
5977 //  wsrc(x, y) =
5978 //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
5979 //      Mh(x) * Cv(y) * Pabove(x,y) +
5980 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
5981 //
5982 //  mask(x, y) = Mh(x) * Mv(y)
5983 //
5984 // These can then be used to efficiently approximate the error for any
5985 // predictor P in the context of the provided neighbouring predictors by
5986 // computing:
5987 //
5988 //  error(x, y) =
5989 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
5990 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)5991 static AOM_INLINE void calc_target_weighted_pred(
5992     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
5993     const uint8_t *above, int above_stride, const uint8_t *left,
5994     int left_stride) {
5995   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
5996   const int bw = xd->width << MI_SIZE_LOG2;
5997   const int bh = xd->height << MI_SIZE_LOG2;
5998   const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
5999   int32_t *mask_buf = obmc_buffer->mask;
6000   int32_t *wsrc_buf = obmc_buffer->wsrc;
6001 
6002   const int is_hbd = is_cur_buf_hbd(xd);
6003   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6004 
6005   // plane 0 should not be sub-sampled
6006   assert(xd->plane[0].subsampling_x == 0);
6007   assert(xd->plane[0].subsampling_y == 0);
6008 
6009   av1_zero_array(wsrc_buf, bw * bh);
6010   for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6011 
6012   // handle above row
6013   if (xd->up_available) {
6014     const int overlap =
6015         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6016     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6017                                                    above_stride, overlap };
6018     foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6019                                   max_neighbor_obmc[mi_size_wide_log2[bsize]],
6020                                   calc_target_weighted_pred_above, &ctxt);
6021   }
6022 
6023   for (int i = 0; i < bw * bh; ++i) {
6024     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6025     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6026   }
6027 
6028   // handle left column
6029   if (xd->left_available) {
6030     const int overlap =
6031         AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6032     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6033                                                    left_stride, overlap };
6034     foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6035                                  max_neighbor_obmc[mi_size_high_log2[bsize]],
6036                                  calc_target_weighted_pred_left, &ctxt);
6037   }
6038 
6039   if (!is_hbd) {
6040     const uint8_t *src = x->plane[0].src.buf;
6041 
6042     for (int row = 0; row < bh; ++row) {
6043       for (int col = 0; col < bw; ++col) {
6044         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6045       }
6046       wsrc_buf += bw;
6047       src += x->plane[0].src.stride;
6048     }
6049   } else {
6050     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6051 
6052     for (int row = 0; row < bh; ++row) {
6053       for (int col = 0; col < bw; ++col) {
6054         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6055       }
6056       wsrc_buf += bw;
6057       src += x->plane[0].src.stride;
6058     }
6059   }
6060 }
6061 
6062 /* Use standard 3x3 Sobel matrix. Macro so it can be used for either high or
6063    low bit-depth arrays. */
6064 #define SOBEL_X(src, stride, i, j)                       \
6065   ((src)[((i)-1) + (stride) * ((j)-1)] -                 \
6066    (src)[((i) + 1) + (stride) * ((j)-1)] +  /* NOLINT */ \
6067    2 * (src)[((i)-1) + (stride) * (j)] -    /* NOLINT */ \
6068    2 * (src)[((i) + 1) + (stride) * (j)] +  /* NOLINT */ \
6069    (src)[((i)-1) + (stride) * ((j) + 1)] -  /* NOLINT */ \
6070    (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6071 #define SOBEL_Y(src, stride, i, j)                       \
6072   ((src)[((i)-1) + (stride) * ((j)-1)] +                 \
6073    2 * (src)[(i) + (stride) * ((j)-1)] +    /* NOLINT */ \
6074    (src)[((i) + 1) + (stride) * ((j)-1)] -  /* NOLINT */ \
6075    (src)[((i)-1) + (stride) * ((j) + 1)] -  /* NOLINT */ \
6076    2 * (src)[(i) + (stride) * ((j) + 1)] -  /* NOLINT */ \
6077    (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6078 
av1_sobel(const uint8_t * input,int stride,int i,int j,bool high_bd)6079 sobel_xy av1_sobel(const uint8_t *input, int stride, int i, int j,
6080                    bool high_bd) {
6081   int16_t s_x;
6082   int16_t s_y;
6083   if (high_bd) {
6084     const uint16_t *src = CONVERT_TO_SHORTPTR(input);
6085     s_x = SOBEL_X(src, stride, i, j);
6086     s_y = SOBEL_Y(src, stride, i, j);
6087   } else {
6088     s_x = SOBEL_X(input, stride, i, j);
6089     s_y = SOBEL_Y(input, stride, i, j);
6090   }
6091   sobel_xy r = { .x = s_x, .y = s_y };
6092   return r;
6093 }
6094 
6095 // 8-tap Gaussian convolution filter with sigma = 1.3, sums to 128,
6096 // all co-efficients must be even.
6097 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 2,  12, 30, 40,
6098                                                                30, 12, 2,  0 };
6099 
av1_gaussian_blur(const uint8_t * src,int src_stride,int w,int h,uint8_t * dst,bool high_bd,int bd)6100 void av1_gaussian_blur(const uint8_t *src, int src_stride, int w, int h,
6101                        uint8_t *dst, bool high_bd, int bd) {
6102   ConvolveParams conv_params = get_conv_params(0, 0, bd);
6103   InterpFilterParams filter = { .filter_ptr = gauss_filter,
6104                                 .taps = 8,
6105                                 .interp_filter = EIGHTTAP_REGULAR };
6106   // Requirements from the vector-optimized implementations.
6107   assert(h % 4 == 0);
6108   assert(w % 8 == 0);
6109   // Because we use an eight tap filter, the stride should be at least 7 + w.
6110   assert(src_stride >= w + 7);
6111 #if CONFIG_AV1_HIGHBITDEPTH
6112   if (high_bd) {
6113     av1_highbd_convolve_2d_sr(CONVERT_TO_SHORTPTR(src), src_stride,
6114                               CONVERT_TO_SHORTPTR(dst), w, w, h, &filter,
6115                               &filter, 0, 0, &conv_params, bd);
6116   } else {
6117     av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6118                        &conv_params);
6119   }
6120 #else
6121   (void)high_bd;
6122   av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6123                      &conv_params);
6124 #endif
6125 }
6126 
edge_probability(const uint8_t * input,int w,int h,bool high_bd,int bd)6127 static EdgeInfo edge_probability(const uint8_t *input, int w, int h,
6128                                  bool high_bd, int bd) {
6129   // The probability of an edge in the whole image is the same as the highest
6130   // probability of an edge for any individual pixel. Use Sobel as the metric
6131   // for finding an edge.
6132   uint16_t highest = 0;
6133   uint16_t highest_x = 0;
6134   uint16_t highest_y = 0;
6135   // Ignore the 1 pixel border around the image for the computation.
6136   for (int j = 1; j < h - 1; ++j) {
6137     for (int i = 1; i < w - 1; ++i) {
6138       sobel_xy g = av1_sobel(input, w, i, j, high_bd);
6139       // Scale down to 8-bit to get same output regardless of bit depth.
6140       int16_t g_x = g.x >> (bd - 8);
6141       int16_t g_y = g.y >> (bd - 8);
6142       uint16_t magnitude = (uint16_t)sqrt(g_x * g_x + g_y * g_y);
6143       highest = AOMMAX(highest, magnitude);
6144       highest_x = AOMMAX(highest_x, g_x);
6145       highest_y = AOMMAX(highest_y, g_y);
6146     }
6147   }
6148   EdgeInfo ei = { .magnitude = highest, .x = highest_x, .y = highest_y };
6149   return ei;
6150 }
6151 
6152 /* Uses most of the Canny edge detection algorithm to find if there are any
6153  * edges in the image.
6154  */
av1_edge_exists(const uint8_t * src,int src_stride,int w,int h,bool high_bd,int bd)6155 EdgeInfo av1_edge_exists(const uint8_t *src, int src_stride, int w, int h,
6156                          bool high_bd, int bd) {
6157   if (w < 3 || h < 3) {
6158     EdgeInfo n = { .magnitude = 0, .x = 0, .y = 0 };
6159     return n;
6160   }
6161   uint8_t *blurred;
6162   if (high_bd) {
6163     blurred = CONVERT_TO_BYTEPTR(aom_memalign(32, sizeof(uint16_t) * w * h));
6164   } else {
6165     blurred = (uint8_t *)aom_memalign(32, sizeof(uint8_t) * w * h);
6166   }
6167   av1_gaussian_blur(src, src_stride, w, h, blurred, high_bd, bd);
6168   // Skip the non-maximum suppression step in Canny edge detection. We just
6169   // want a probability of an edge existing in the buffer, which is determined
6170   // by the strongest edge in it -- we don't need to eliminate the weaker
6171   // edges. Use Sobel for the edge detection.
6172   EdgeInfo prob = edge_probability(blurred, w, h, high_bd, bd);
6173   if (high_bd) {
6174     aom_free(CONVERT_TO_SHORTPTR(blurred));
6175   } else {
6176     aom_free(blurred);
6177   }
6178   return prob;
6179 }
6180