1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <stdbool.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/av1_rtcd.h"
19 
20 #include "aom_dsp/aom_dsp_common.h"
21 #include "aom_dsp/blend.h"
22 #include "aom_mem/aom_mem.h"
23 #include "aom_ports/aom_timer.h"
24 #include "aom_ports/mem.h"
25 
26 #include "av1/common/av1_common_int.h"
27 #include "av1/common/cfl.h"
28 #include "av1/common/blockd.h"
29 #include "av1/common/common.h"
30 #include "av1/common/common_data.h"
31 #include "av1/common/entropy.h"
32 #include "av1/common/entropymode.h"
33 #include "av1/common/idct.h"
34 #include "av1/common/mvref_common.h"
35 #include "av1/common/obmc.h"
36 #include "av1/common/pred_common.h"
37 #include "av1/common/quant_common.h"
38 #include "av1/common/reconinter.h"
39 #include "av1/common/reconintra.h"
40 #include "av1/common/scan.h"
41 #include "av1/common/seg_common.h"
42 #include "av1/common/txb_common.h"
43 #include "av1/common/warped_motion.h"
44 
45 #include "av1/encoder/aq_variance.h"
46 #include "av1/encoder/av1_quantize.h"
47 #include "av1/encoder/cost.h"
48 #include "av1/encoder/compound_type.h"
49 #include "av1/encoder/encodemb.h"
50 #include "av1/encoder/encodemv.h"
51 #include "av1/encoder/encoder.h"
52 #include "av1/encoder/encodetxb.h"
53 #include "av1/encoder/hybrid_fwd_txfm.h"
54 #include "av1/encoder/interp_search.h"
55 #include "av1/encoder/intra_mode_search.h"
56 #include "av1/encoder/intra_mode_search_utils.h"
57 #include "av1/encoder/mcomp.h"
58 #include "av1/encoder/ml.h"
59 #include "av1/encoder/mode_prune_model_weights.h"
60 #include "av1/encoder/model_rd.h"
61 #include "av1/encoder/motion_search_facade.h"
62 #include "av1/encoder/palette.h"
63 #include "av1/encoder/pustats.h"
64 #include "av1/encoder/random.h"
65 #include "av1/encoder/ratectrl.h"
66 #include "av1/encoder/rd.h"
67 #include "av1/encoder/rdopt.h"
68 #include "av1/encoder/reconinter_enc.h"
69 #include "av1/encoder/tokenize.h"
70 #include "av1/encoder/tpl_model.h"
71 #include "av1/encoder/tx_search.h"
72 
73 #define LAST_NEW_MV_INDEX 6
74 
75 // Mode_threshold multiplication factor table for prune_inter_modes_if_skippable
76 // The values are kept in Q12 format and equation used to derive is
77 // (2.5 - ((float)x->qindex / MAXQ) * 1.5)
78 #define MODE_THRESH_QBITS 12
79 static const int mode_threshold_mul_factor[QINDEX_RANGE] = {
80   10240, 10216, 10192, 10168, 10144, 10120, 10095, 10071, 10047, 10023, 9999,
81   9975,  9951,  9927,  9903,  9879,  9854,  9830,  9806,  9782,  9758,  9734,
82   9710,  9686,  9662,  9638,  9614,  9589,  9565,  9541,  9517,  9493,  9469,
83   9445,  9421,  9397,  9373,  9349,  9324,  9300,  9276,  9252,  9228,  9204,
84   9180,  9156,  9132,  9108,  9083,  9059,  9035,  9011,  8987,  8963,  8939,
85   8915,  8891,  8867,  8843,  8818,  8794,  8770,  8746,  8722,  8698,  8674,
86   8650,  8626,  8602,  8578,  8553,  8529,  8505,  8481,  8457,  8433,  8409,
87   8385,  8361,  8337,  8312,  8288,  8264,  8240,  8216,  8192,  8168,  8144,
88   8120,  8096,  8072,  8047,  8023,  7999,  7975,  7951,  7927,  7903,  7879,
89   7855,  7831,  7806,  7782,  7758,  7734,  7710,  7686,  7662,  7638,  7614,
90   7590,  7566,  7541,  7517,  7493,  7469,  7445,  7421,  7397,  7373,  7349,
91   7325,  7301,  7276,  7252,  7228,  7204,  7180,  7156,  7132,  7108,  7084,
92   7060,  7035,  7011,  6987,  6963,  6939,  6915,  6891,  6867,  6843,  6819,
93   6795,  6770,  6746,  6722,  6698,  6674,  6650,  6626,  6602,  6578,  6554,
94   6530,  6505,  6481,  6457,  6433,  6409,  6385,  6361,  6337,  6313,  6289,
95   6264,  6240,  6216,  6192,  6168,  6144,  6120,  6096,  6072,  6048,  6024,
96   5999,  5975,  5951,  5927,  5903,  5879,  5855,  5831,  5807,  5783,  5758,
97   5734,  5710,  5686,  5662,  5638,  5614,  5590,  5566,  5542,  5518,  5493,
98   5469,  5445,  5421,  5397,  5373,  5349,  5325,  5301,  5277,  5253,  5228,
99   5204,  5180,  5156,  5132,  5108,  5084,  5060,  5036,  5012,  4987,  4963,
100   4939,  4915,  4891,  4867,  4843,  4819,  4795,  4771,  4747,  4722,  4698,
101   4674,  4650,  4626,  4602,  4578,  4554,  4530,  4506,  4482,  4457,  4433,
102   4409,  4385,  4361,  4337,  4313,  4289,  4265,  4241,  4216,  4192,  4168,
103   4144,  4120,  4096
104 };
105 
106 static const THR_MODES av1_default_mode_order[MAX_MODES] = {
107   THR_NEARESTMV,
108   THR_NEARESTL2,
109   THR_NEARESTL3,
110   THR_NEARESTB,
111   THR_NEARESTA2,
112   THR_NEARESTA,
113   THR_NEARESTG,
114 
115   THR_NEWMV,
116   THR_NEWL2,
117   THR_NEWL3,
118   THR_NEWB,
119   THR_NEWA2,
120   THR_NEWA,
121   THR_NEWG,
122 
123   THR_NEARMV,
124   THR_NEARL2,
125   THR_NEARL3,
126   THR_NEARB,
127   THR_NEARA2,
128   THR_NEARA,
129   THR_NEARG,
130 
131   THR_GLOBALMV,
132   THR_GLOBALL2,
133   THR_GLOBALL3,
134   THR_GLOBALB,
135   THR_GLOBALA2,
136   THR_GLOBALA,
137   THR_GLOBALG,
138 
139   THR_COMP_NEAREST_NEARESTLA,
140   THR_COMP_NEAREST_NEARESTL2A,
141   THR_COMP_NEAREST_NEARESTL3A,
142   THR_COMP_NEAREST_NEARESTGA,
143   THR_COMP_NEAREST_NEARESTLB,
144   THR_COMP_NEAREST_NEARESTL2B,
145   THR_COMP_NEAREST_NEARESTL3B,
146   THR_COMP_NEAREST_NEARESTGB,
147   THR_COMP_NEAREST_NEARESTLA2,
148   THR_COMP_NEAREST_NEARESTL2A2,
149   THR_COMP_NEAREST_NEARESTL3A2,
150   THR_COMP_NEAREST_NEARESTGA2,
151   THR_COMP_NEAREST_NEARESTLL2,
152   THR_COMP_NEAREST_NEARESTLL3,
153   THR_COMP_NEAREST_NEARESTLG,
154   THR_COMP_NEAREST_NEARESTBA,
155 
156   THR_COMP_NEAR_NEARLB,
157   THR_COMP_NEW_NEWLB,
158   THR_COMP_NEW_NEARESTLB,
159   THR_COMP_NEAREST_NEWLB,
160   THR_COMP_NEW_NEARLB,
161   THR_COMP_NEAR_NEWLB,
162   THR_COMP_GLOBAL_GLOBALLB,
163 
164   THR_COMP_NEAR_NEARLA,
165   THR_COMP_NEW_NEWLA,
166   THR_COMP_NEW_NEARESTLA,
167   THR_COMP_NEAREST_NEWLA,
168   THR_COMP_NEW_NEARLA,
169   THR_COMP_NEAR_NEWLA,
170   THR_COMP_GLOBAL_GLOBALLA,
171 
172   THR_COMP_NEAR_NEARL2A,
173   THR_COMP_NEW_NEWL2A,
174   THR_COMP_NEW_NEARESTL2A,
175   THR_COMP_NEAREST_NEWL2A,
176   THR_COMP_NEW_NEARL2A,
177   THR_COMP_NEAR_NEWL2A,
178   THR_COMP_GLOBAL_GLOBALL2A,
179 
180   THR_COMP_NEAR_NEARL3A,
181   THR_COMP_NEW_NEWL3A,
182   THR_COMP_NEW_NEARESTL3A,
183   THR_COMP_NEAREST_NEWL3A,
184   THR_COMP_NEW_NEARL3A,
185   THR_COMP_NEAR_NEWL3A,
186   THR_COMP_GLOBAL_GLOBALL3A,
187 
188   THR_COMP_NEAR_NEARGA,
189   THR_COMP_NEW_NEWGA,
190   THR_COMP_NEW_NEARESTGA,
191   THR_COMP_NEAREST_NEWGA,
192   THR_COMP_NEW_NEARGA,
193   THR_COMP_NEAR_NEWGA,
194   THR_COMP_GLOBAL_GLOBALGA,
195 
196   THR_COMP_NEAR_NEARL2B,
197   THR_COMP_NEW_NEWL2B,
198   THR_COMP_NEW_NEARESTL2B,
199   THR_COMP_NEAREST_NEWL2B,
200   THR_COMP_NEW_NEARL2B,
201   THR_COMP_NEAR_NEWL2B,
202   THR_COMP_GLOBAL_GLOBALL2B,
203 
204   THR_COMP_NEAR_NEARL3B,
205   THR_COMP_NEW_NEWL3B,
206   THR_COMP_NEW_NEARESTL3B,
207   THR_COMP_NEAREST_NEWL3B,
208   THR_COMP_NEW_NEARL3B,
209   THR_COMP_NEAR_NEWL3B,
210   THR_COMP_GLOBAL_GLOBALL3B,
211 
212   THR_COMP_NEAR_NEARGB,
213   THR_COMP_NEW_NEWGB,
214   THR_COMP_NEW_NEARESTGB,
215   THR_COMP_NEAREST_NEWGB,
216   THR_COMP_NEW_NEARGB,
217   THR_COMP_NEAR_NEWGB,
218   THR_COMP_GLOBAL_GLOBALGB,
219 
220   THR_COMP_NEAR_NEARLA2,
221   THR_COMP_NEW_NEWLA2,
222   THR_COMP_NEW_NEARESTLA2,
223   THR_COMP_NEAREST_NEWLA2,
224   THR_COMP_NEW_NEARLA2,
225   THR_COMP_NEAR_NEWLA2,
226   THR_COMP_GLOBAL_GLOBALLA2,
227 
228   THR_COMP_NEAR_NEARL2A2,
229   THR_COMP_NEW_NEWL2A2,
230   THR_COMP_NEW_NEARESTL2A2,
231   THR_COMP_NEAREST_NEWL2A2,
232   THR_COMP_NEW_NEARL2A2,
233   THR_COMP_NEAR_NEWL2A2,
234   THR_COMP_GLOBAL_GLOBALL2A2,
235 
236   THR_COMP_NEAR_NEARL3A2,
237   THR_COMP_NEW_NEWL3A2,
238   THR_COMP_NEW_NEARESTL3A2,
239   THR_COMP_NEAREST_NEWL3A2,
240   THR_COMP_NEW_NEARL3A2,
241   THR_COMP_NEAR_NEWL3A2,
242   THR_COMP_GLOBAL_GLOBALL3A2,
243 
244   THR_COMP_NEAR_NEARGA2,
245   THR_COMP_NEW_NEWGA2,
246   THR_COMP_NEW_NEARESTGA2,
247   THR_COMP_NEAREST_NEWGA2,
248   THR_COMP_NEW_NEARGA2,
249   THR_COMP_NEAR_NEWGA2,
250   THR_COMP_GLOBAL_GLOBALGA2,
251 
252   THR_COMP_NEAR_NEARLL2,
253   THR_COMP_NEW_NEWLL2,
254   THR_COMP_NEW_NEARESTLL2,
255   THR_COMP_NEAREST_NEWLL2,
256   THR_COMP_NEW_NEARLL2,
257   THR_COMP_NEAR_NEWLL2,
258   THR_COMP_GLOBAL_GLOBALLL2,
259 
260   THR_COMP_NEAR_NEARLL3,
261   THR_COMP_NEW_NEWLL3,
262   THR_COMP_NEW_NEARESTLL3,
263   THR_COMP_NEAREST_NEWLL3,
264   THR_COMP_NEW_NEARLL3,
265   THR_COMP_NEAR_NEWLL3,
266   THR_COMP_GLOBAL_GLOBALLL3,
267 
268   THR_COMP_NEAR_NEARLG,
269   THR_COMP_NEW_NEWLG,
270   THR_COMP_NEW_NEARESTLG,
271   THR_COMP_NEAREST_NEWLG,
272   THR_COMP_NEW_NEARLG,
273   THR_COMP_NEAR_NEWLG,
274   THR_COMP_GLOBAL_GLOBALLG,
275 
276   THR_COMP_NEAR_NEARBA,
277   THR_COMP_NEW_NEWBA,
278   THR_COMP_NEW_NEARESTBA,
279   THR_COMP_NEAREST_NEWBA,
280   THR_COMP_NEW_NEARBA,
281   THR_COMP_NEAR_NEWBA,
282   THR_COMP_GLOBAL_GLOBALBA,
283 
284   THR_DC,
285   THR_PAETH,
286   THR_SMOOTH,
287   THR_SMOOTH_V,
288   THR_SMOOTH_H,
289   THR_H_PRED,
290   THR_V_PRED,
291   THR_D135_PRED,
292   THR_D203_PRED,
293   THR_D157_PRED,
294   THR_D67_PRED,
295   THR_D113_PRED,
296   THR_D45_PRED,
297 };
298 
299 /*!\cond */
300 typedef struct SingleInterModeState {
301   int64_t rd;
302   MV_REFERENCE_FRAME ref_frame;
303   int valid;
304 } SingleInterModeState;
305 
306 typedef struct InterModeSearchState {
307   int64_t best_rd;
308   int64_t best_skip_rd[2];
309   MB_MODE_INFO best_mbmode;
310   int best_rate_y;
311   int best_rate_uv;
312   int best_mode_skippable;
313   int best_skip2;
314   THR_MODES best_mode_index;
315   int num_available_refs;
316   int64_t dist_refs[REF_FRAMES];
317   int dist_order_refs[REF_FRAMES];
318   int64_t mode_threshold[MAX_MODES];
319   int64_t best_intra_rd;
320   unsigned int best_pred_sse;
321 
322   /*!
323    * \brief Keep track of best intra rd for use in compound mode.
324    */
325   int64_t best_pred_rd[REFERENCE_MODES];
326   int64_t best_pred_diff[REFERENCE_MODES];
327   // Save a set of single_newmv for each checked ref_mv.
328   int_mv single_newmv[MAX_REF_MV_SEARCH][REF_FRAMES];
329   int single_newmv_rate[MAX_REF_MV_SEARCH][REF_FRAMES];
330   int single_newmv_valid[MAX_REF_MV_SEARCH][REF_FRAMES];
331   int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
332   // The rd of simple translation in single inter modes
333   int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SEARCH][REF_FRAMES];
334   int64_t best_single_rd[REF_FRAMES];
335   PREDICTION_MODE best_single_mode[REF_FRAMES];
336 
337   // Single search results by [directions][modes][reference frames]
338   SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
339   int single_state_cnt[2][SINGLE_INTER_MODE_NUM];
340   SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM]
341                                             [FWD_REFS];
342   int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM];
343   MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS];
344   IntraModeSearchState intra_search_state;
345   RD_STATS best_y_rdcost;
346 } InterModeSearchState;
347 /*!\endcond */
348 
av1_inter_mode_data_init(TileDataEnc * tile_data)349 void av1_inter_mode_data_init(TileDataEnc *tile_data) {
350   for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
351     InterModeRdModel *md = &tile_data->inter_mode_rd_models[i];
352     md->ready = 0;
353     md->num = 0;
354     md->dist_sum = 0;
355     md->ld_sum = 0;
356     md->sse_sum = 0;
357     md->sse_sse_sum = 0;
358     md->sse_ld_sum = 0;
359   }
360 }
361 
get_est_rate_dist(const TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int * est_residue_cost,int64_t * est_dist)362 static int get_est_rate_dist(const TileDataEnc *tile_data, BLOCK_SIZE bsize,
363                              int64_t sse, int *est_residue_cost,
364                              int64_t *est_dist) {
365   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
366   if (md->ready) {
367     if (sse < md->dist_mean) {
368       *est_residue_cost = 0;
369       *est_dist = sse;
370     } else {
371       *est_dist = (int64_t)round(md->dist_mean);
372       const double est_ld = md->a * sse + md->b;
373       // Clamp estimated rate cost by INT_MAX / 2.
374       // TODO(angiebird@google.com): find better solution than clamping.
375       if (fabs(est_ld) < 1e-2) {
376         *est_residue_cost = INT_MAX / 2;
377       } else {
378         double est_residue_cost_dbl = ((sse - md->dist_mean) / est_ld);
379         if (est_residue_cost_dbl < 0) {
380           *est_residue_cost = 0;
381         } else {
382           *est_residue_cost =
383               (int)AOMMIN((int64_t)round(est_residue_cost_dbl), INT_MAX / 2);
384         }
385       }
386       if (*est_residue_cost <= 0) {
387         *est_residue_cost = 0;
388         *est_dist = sse;
389       }
390     }
391     return 1;
392   }
393   return 0;
394 }
395 
av1_inter_mode_data_fit(TileDataEnc * tile_data,int rdmult)396 void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) {
397   for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
398     const int block_idx = inter_mode_data_block_idx(bsize);
399     InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
400     if (block_idx == -1) continue;
401     if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) {
402       continue;
403     } else {
404       if (md->ready == 0) {
405         md->dist_mean = md->dist_sum / md->num;
406         md->ld_mean = md->ld_sum / md->num;
407         md->sse_mean = md->sse_sum / md->num;
408         md->sse_sse_mean = md->sse_sse_sum / md->num;
409         md->sse_ld_mean = md->sse_ld_sum / md->num;
410       } else {
411         const double factor = 3;
412         md->dist_mean =
413             (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1);
414         md->ld_mean =
415             (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1);
416         md->sse_mean =
417             (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1);
418         md->sse_sse_mean =
419             (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) /
420             (factor + 1);
421         md->sse_ld_mean =
422             (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) /
423             (factor + 1);
424       }
425 
426       const double my = md->ld_mean;
427       const double mx = md->sse_mean;
428       const double dx = sqrt(md->sse_sse_mean);
429       const double dxy = md->sse_ld_mean;
430 
431       md->a = (dxy - mx * my) / (dx * dx - mx * mx);
432       md->b = my - md->a * mx;
433       md->ready = 1;
434 
435       md->num = 0;
436       md->dist_sum = 0;
437       md->ld_sum = 0;
438       md->sse_sum = 0;
439       md->sse_sse_sum = 0;
440       md->sse_ld_sum = 0;
441     }
442     (void)rdmult;
443   }
444 }
445 
inter_mode_data_push(TileDataEnc * tile_data,BLOCK_SIZE bsize,int64_t sse,int64_t dist,int residue_cost)446 static AOM_INLINE void inter_mode_data_push(TileDataEnc *tile_data,
447                                             BLOCK_SIZE bsize, int64_t sse,
448                                             int64_t dist, int residue_cost) {
449   if (residue_cost == 0 || sse == dist) return;
450   const int block_idx = inter_mode_data_block_idx(bsize);
451   if (block_idx == -1) return;
452   InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize];
453   if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) {
454     const double ld = (sse - dist) * 1. / residue_cost;
455     ++rd_model->num;
456     rd_model->dist_sum += dist;
457     rd_model->ld_sum += ld;
458     rd_model->sse_sum += sse;
459     rd_model->sse_sse_sum += (double)sse * (double)sse;
460     rd_model->sse_ld_sum += sse * ld;
461   }
462 }
463 
inter_modes_info_push(InterModesInfo * inter_modes_info,int mode_rate,int64_t sse,int64_t rd,RD_STATS * rd_cost,RD_STATS * rd_cost_y,RD_STATS * rd_cost_uv,const MB_MODE_INFO * mbmi)464 static AOM_INLINE void inter_modes_info_push(InterModesInfo *inter_modes_info,
465                                              int mode_rate, int64_t sse,
466                                              int64_t rd, RD_STATS *rd_cost,
467                                              RD_STATS *rd_cost_y,
468                                              RD_STATS *rd_cost_uv,
469                                              const MB_MODE_INFO *mbmi) {
470   const int num = inter_modes_info->num;
471   assert(num < MAX_INTER_MODES);
472   inter_modes_info->mbmi_arr[num] = *mbmi;
473   inter_modes_info->mode_rate_arr[num] = mode_rate;
474   inter_modes_info->sse_arr[num] = sse;
475   inter_modes_info->est_rd_arr[num] = rd;
476   inter_modes_info->rd_cost_arr[num] = *rd_cost;
477   inter_modes_info->rd_cost_y_arr[num] = *rd_cost_y;
478   inter_modes_info->rd_cost_uv_arr[num] = *rd_cost_uv;
479   ++inter_modes_info->num;
480 }
481 
compare_rd_idx_pair(const void * a,const void * b)482 static int compare_rd_idx_pair(const void *a, const void *b) {
483   if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) {
484     // To avoid inconsistency in qsort() ordering when two elements are equal,
485     // using idx as tie breaker. Refer aomedia:2928
486     if (((RdIdxPair *)a)->idx == ((RdIdxPair *)b)->idx)
487       return 0;
488     else if (((RdIdxPair *)a)->idx > ((RdIdxPair *)b)->idx)
489       return 1;
490     else
491       return -1;
492   } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) {
493     return 1;
494   } else {
495     return -1;
496   }
497 }
498 
inter_modes_info_sort(const InterModesInfo * inter_modes_info,RdIdxPair * rd_idx_pair_arr)499 static AOM_INLINE void inter_modes_info_sort(
500     const InterModesInfo *inter_modes_info, RdIdxPair *rd_idx_pair_arr) {
501   if (inter_modes_info->num == 0) {
502     return;
503   }
504   for (int i = 0; i < inter_modes_info->num; ++i) {
505     rd_idx_pair_arr[i].idx = i;
506     rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i];
507   }
508   qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]),
509         compare_rd_idx_pair);
510 }
511 
512 // Similar to get_horver_correlation, but also takes into account first
513 // row/column, when computing horizontal/vertical correlation.
av1_get_horver_correlation_full_c(const int16_t * diff,int stride,int width,int height,float * hcorr,float * vcorr)514 void av1_get_horver_correlation_full_c(const int16_t *diff, int stride,
515                                        int width, int height, float *hcorr,
516                                        float *vcorr) {
517   // The following notation is used:
518   // x - current pixel
519   // y - left neighbor pixel
520   // z - top neighbor pixel
521   int64_t x_sum = 0, x2_sum = 0, xy_sum = 0, xz_sum = 0;
522   int64_t x_firstrow = 0, x_finalrow = 0, x_firstcol = 0, x_finalcol = 0;
523   int64_t x2_firstrow = 0, x2_finalrow = 0, x2_firstcol = 0, x2_finalcol = 0;
524 
525   // First, process horizontal correlation on just the first row
526   x_sum += diff[0];
527   x2_sum += diff[0] * diff[0];
528   x_firstrow += diff[0];
529   x2_firstrow += diff[0] * diff[0];
530   for (int j = 1; j < width; ++j) {
531     const int16_t x = diff[j];
532     const int16_t y = diff[j - 1];
533     x_sum += x;
534     x_firstrow += x;
535     x2_sum += x * x;
536     x2_firstrow += x * x;
537     xy_sum += x * y;
538   }
539 
540   // Process vertical correlation in the first column
541   x_firstcol += diff[0];
542   x2_firstcol += diff[0] * diff[0];
543   for (int i = 1; i < height; ++i) {
544     const int16_t x = diff[i * stride];
545     const int16_t z = diff[(i - 1) * stride];
546     x_sum += x;
547     x_firstcol += x;
548     x2_sum += x * x;
549     x2_firstcol += x * x;
550     xz_sum += x * z;
551   }
552 
553   // Now process horiz and vert correlation through the rest unit
554   for (int i = 1; i < height; ++i) {
555     for (int j = 1; j < width; ++j) {
556       const int16_t x = diff[i * stride + j];
557       const int16_t y = diff[i * stride + j - 1];
558       const int16_t z = diff[(i - 1) * stride + j];
559       x_sum += x;
560       x2_sum += x * x;
561       xy_sum += x * y;
562       xz_sum += x * z;
563     }
564   }
565 
566   for (int j = 0; j < width; ++j) {
567     x_finalrow += diff[(height - 1) * stride + j];
568     x2_finalrow +=
569         diff[(height - 1) * stride + j] * diff[(height - 1) * stride + j];
570   }
571   for (int i = 0; i < height; ++i) {
572     x_finalcol += diff[i * stride + width - 1];
573     x2_finalcol += diff[i * stride + width - 1] * diff[i * stride + width - 1];
574   }
575 
576   int64_t xhor_sum = x_sum - x_finalcol;
577   int64_t xver_sum = x_sum - x_finalrow;
578   int64_t y_sum = x_sum - x_firstcol;
579   int64_t z_sum = x_sum - x_firstrow;
580   int64_t x2hor_sum = x2_sum - x2_finalcol;
581   int64_t x2ver_sum = x2_sum - x2_finalrow;
582   int64_t y2_sum = x2_sum - x2_firstcol;
583   int64_t z2_sum = x2_sum - x2_firstrow;
584 
585   const float num_hor = (float)(height * (width - 1));
586   const float num_ver = (float)((height - 1) * width);
587 
588   const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor;
589   const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver;
590 
591   const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor;
592   const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver;
593 
594   const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor;
595   const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver;
596 
597   if (xhor_var_n > 0 && y_var_n > 0) {
598     *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n);
599     *hcorr = *hcorr < 0 ? 0 : *hcorr;
600   } else {
601     *hcorr = 1.0;
602   }
603   if (xver_var_n > 0 && z_var_n > 0) {
604     *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n);
605     *vcorr = *vcorr < 0 ? 0 : *vcorr;
606   } else {
607     *vcorr = 1.0;
608   }
609 }
610 
get_sse(const AV1_COMP * cpi,const MACROBLOCK * x,int64_t * sse_y)611 static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
612                        int64_t *sse_y) {
613   const AV1_COMMON *cm = &cpi->common;
614   const int num_planes = av1_num_planes(cm);
615   const MACROBLOCKD *xd = &x->e_mbd;
616   const MB_MODE_INFO *mbmi = xd->mi[0];
617   int64_t total_sse = 0;
618   for (int plane = 0; plane < num_planes; ++plane) {
619     if (plane && !xd->is_chroma_ref) break;
620     const struct macroblock_plane *const p = &x->plane[plane];
621     const struct macroblockd_plane *const pd = &xd->plane[plane];
622     const BLOCK_SIZE bs =
623         get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
624     unsigned int sse;
625 
626     cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
627                             pd->dst.stride, &sse);
628     total_sse += sse;
629     if (!plane && sse_y) *sse_y = sse;
630   }
631   total_sse <<= 4;
632   return total_sse;
633 }
634 
av1_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz)635 int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff,
636                           intptr_t block_size, int64_t *ssz) {
637   int i;
638   int64_t error = 0, sqcoeff = 0;
639 
640   for (i = 0; i < block_size; i++) {
641     const int diff = coeff[i] - dqcoeff[i];
642     error += diff * diff;
643     sqcoeff += coeff[i] * coeff[i];
644   }
645 
646   *ssz = sqcoeff;
647   return error;
648 }
649 
av1_block_error_lp_c(const int16_t * coeff,const int16_t * dqcoeff,intptr_t block_size)650 int64_t av1_block_error_lp_c(const int16_t *coeff, const int16_t *dqcoeff,
651                              intptr_t block_size) {
652   int64_t error = 0;
653 
654   for (int i = 0; i < block_size; i++) {
655     const int diff = coeff[i] - dqcoeff[i];
656     error += diff * diff;
657   }
658 
659   return error;
660 }
661 
662 #if CONFIG_AV1_HIGHBITDEPTH
av1_highbd_block_error_c(const tran_low_t * coeff,const tran_low_t * dqcoeff,intptr_t block_size,int64_t * ssz,int bd)663 int64_t av1_highbd_block_error_c(const tran_low_t *coeff,
664                                  const tran_low_t *dqcoeff, intptr_t block_size,
665                                  int64_t *ssz, int bd) {
666   int i;
667   int64_t error = 0, sqcoeff = 0;
668   int shift = 2 * (bd - 8);
669   int rounding = shift > 0 ? 1 << (shift - 1) : 0;
670 
671   for (i = 0; i < block_size; i++) {
672     const int64_t diff = coeff[i] - dqcoeff[i];
673     error += diff * diff;
674     sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i];
675   }
676   assert(error >= 0 && sqcoeff >= 0);
677   error = (error + rounding) >> shift;
678   sqcoeff = (sqcoeff + rounding) >> shift;
679 
680   *ssz = sqcoeff;
681   return error;
682 }
683 #endif
684 
conditional_skipintra(PREDICTION_MODE mode,PREDICTION_MODE best_intra_mode)685 static int conditional_skipintra(PREDICTION_MODE mode,
686                                  PREDICTION_MODE best_intra_mode) {
687   if (mode == D113_PRED && best_intra_mode != V_PRED &&
688       best_intra_mode != D135_PRED)
689     return 1;
690   if (mode == D67_PRED && best_intra_mode != V_PRED &&
691       best_intra_mode != D45_PRED)
692     return 1;
693   if (mode == D203_PRED && best_intra_mode != H_PRED &&
694       best_intra_mode != D45_PRED)
695     return 1;
696   if (mode == D157_PRED && best_intra_mode != H_PRED &&
697       best_intra_mode != D135_PRED)
698     return 1;
699   return 0;
700 }
701 
cost_mv_ref(const ModeCosts * const mode_costs,PREDICTION_MODE mode,int16_t mode_context)702 static int cost_mv_ref(const ModeCosts *const mode_costs, PREDICTION_MODE mode,
703                        int16_t mode_context) {
704   if (is_inter_compound_mode(mode)) {
705     return mode_costs
706         ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)];
707   }
708 
709   int mode_cost = 0;
710   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
711 
712   assert(is_inter_mode(mode));
713 
714   if (mode == NEWMV) {
715     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][0];
716     return mode_cost;
717   } else {
718     mode_cost = mode_costs->newmv_mode_cost[mode_ctx][1];
719     mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
720 
721     if (mode == GLOBALMV) {
722       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][0];
723       return mode_cost;
724     } else {
725       mode_cost += mode_costs->zeromv_mode_cost[mode_ctx][1];
726       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
727       mode_cost += mode_costs->refmv_mode_cost[mode_ctx][mode != NEARESTMV];
728       return mode_cost;
729     }
730   }
731 }
732 
get_single_mode(PREDICTION_MODE this_mode,int ref_idx)733 static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode,
734                                               int ref_idx) {
735   return ref_idx ? compound_ref1_mode(this_mode)
736                  : compound_ref0_mode(this_mode);
737 }
738 
estimate_ref_frame_costs(const AV1_COMMON * cm,const MACROBLOCKD * xd,const ModeCosts * mode_costs,int segment_id,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES])739 static AOM_INLINE void estimate_ref_frame_costs(
740     const AV1_COMMON *cm, const MACROBLOCKD *xd, const ModeCosts *mode_costs,
741     int segment_id, unsigned int *ref_costs_single,
742     unsigned int (*ref_costs_comp)[REF_FRAMES]) {
743   int seg_ref_active =
744       segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
745   if (seg_ref_active) {
746     memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single));
747     int ref_frame;
748     for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
749       memset(ref_costs_comp[ref_frame], 0,
750              REF_FRAMES * sizeof((*ref_costs_comp)[0]));
751   } else {
752     int intra_inter_ctx = av1_get_intra_inter_context(xd);
753     ref_costs_single[INTRA_FRAME] =
754         mode_costs->intra_inter_cost[intra_inter_ctx][0];
755     unsigned int base_cost = mode_costs->intra_inter_cost[intra_inter_ctx][1];
756 
757     for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
758       ref_costs_single[i] = base_cost;
759 
760     const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd);
761     const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd);
762     const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd);
763     const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd);
764     const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd);
765     const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd);
766 
767     // Determine cost of a single ref frame, where frame types are represented
768     // by a tree:
769     // Level 0: add cost whether this ref is a forward or backward ref
770     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
771     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
772     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
773     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][0];
774     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
775     ref_costs_single[ALTREF2_FRAME] +=
776         mode_costs->single_ref_cost[ctx_p1][0][1];
777     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p1][0][1];
778 
779     // Level 1: if this ref is forward ref,
780     // add cost whether it is last/last2 or last3/golden
781     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
782     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][0];
783     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
784     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p3][2][1];
785 
786     // Level 1: if this ref is backward ref
787     // then add cost whether this ref is altref or backward ref
788     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][0];
789     ref_costs_single[ALTREF2_FRAME] +=
790         mode_costs->single_ref_cost[ctx_p2][1][0];
791     ref_costs_single[ALTREF_FRAME] += mode_costs->single_ref_cost[ctx_p2][1][1];
792 
793     // Level 2: further add cost whether this ref is last or last2
794     ref_costs_single[LAST_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][0];
795     ref_costs_single[LAST2_FRAME] += mode_costs->single_ref_cost[ctx_p4][3][1];
796 
797     // Level 2: last3 or golden
798     ref_costs_single[LAST3_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][0];
799     ref_costs_single[GOLDEN_FRAME] += mode_costs->single_ref_cost[ctx_p5][4][1];
800 
801     // Level 2: bwdref or altref2
802     ref_costs_single[BWDREF_FRAME] += mode_costs->single_ref_cost[ctx_p6][5][0];
803     ref_costs_single[ALTREF2_FRAME] +=
804         mode_costs->single_ref_cost[ctx_p6][5][1];
805 
806     if (cm->current_frame.reference_mode != SINGLE_REFERENCE) {
807       // Similar to single ref, determine cost of compound ref frames.
808       // cost_compound_refs = cost_first_ref + cost_second_ref
809       const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd);
810       const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd);
811       const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd);
812       const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd);
813       const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd);
814 
815       const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd);
816       unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 };
817 
818       ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] =
819           ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] =
820               base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][1];
821       ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0;
822       ref_bicomp_costs[ALTREF_FRAME] = 0;
823 
824       // cost of first ref frame
825       ref_bicomp_costs[LAST_FRAME] +=
826           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
827       ref_bicomp_costs[LAST2_FRAME] +=
828           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][0];
829       ref_bicomp_costs[LAST3_FRAME] +=
830           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
831       ref_bicomp_costs[GOLDEN_FRAME] +=
832           mode_costs->comp_ref_cost[ref_comp_ctx_p][0][1];
833 
834       ref_bicomp_costs[LAST_FRAME] +=
835           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][0];
836       ref_bicomp_costs[LAST2_FRAME] +=
837           mode_costs->comp_ref_cost[ref_comp_ctx_p1][1][1];
838 
839       ref_bicomp_costs[LAST3_FRAME] +=
840           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][0];
841       ref_bicomp_costs[GOLDEN_FRAME] +=
842           mode_costs->comp_ref_cost[ref_comp_ctx_p2][2][1];
843 
844       // cost of second ref frame
845       ref_bicomp_costs[BWDREF_FRAME] +=
846           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
847       ref_bicomp_costs[ALTREF2_FRAME] +=
848           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][0];
849       ref_bicomp_costs[ALTREF_FRAME] +=
850           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p][0][1];
851 
852       ref_bicomp_costs[BWDREF_FRAME] +=
853           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0];
854       ref_bicomp_costs[ALTREF2_FRAME] +=
855           mode_costs->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1];
856 
857       // cost: if one ref frame is forward ref, the other ref is backward ref
858       int ref0, ref1;
859       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
860         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) {
861           ref_costs_comp[ref0][ref1] =
862               ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1];
863         }
864       }
865 
866       // cost: if both ref frames are the same side.
867       const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd);
868       const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd);
869       const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd);
870       ref_costs_comp[LAST_FRAME][LAST2_FRAME] =
871           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
872           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
873           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0];
874       ref_costs_comp[LAST_FRAME][LAST3_FRAME] =
875           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
876           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
877           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
878           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0];
879       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] =
880           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
881           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] +
882           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] +
883           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1];
884       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] =
885           base_cost + mode_costs->comp_ref_type_cost[comp_ref_type_ctx][0] +
886           mode_costs->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1];
887     } else {
888       int ref0, ref1;
889       for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) {
890         for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1)
891           ref_costs_comp[ref0][ref1] = 512;
892       }
893       ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512;
894       ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512;
895       ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512;
896       ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512;
897     }
898   }
899 }
900 
store_coding_context(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,int mode_index,int64_t comp_pred_diff[REFERENCE_MODES],int skippable)901 static AOM_INLINE void store_coding_context(
902 #if CONFIG_INTERNAL_STATS
903     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index,
904 #else
905     MACROBLOCK *x, PICK_MODE_CONTEXT *ctx,
906 #endif  // CONFIG_INTERNAL_STATS
907     int64_t comp_pred_diff[REFERENCE_MODES], int skippable) {
908   MACROBLOCKD *const xd = &x->e_mbd;
909 
910   // Take a snapshot of the coding context so it can be
911   // restored if we decide to encode this way
912   ctx->rd_stats.skip_txfm = x->txfm_search_info.skip_txfm;
913   ctx->skippable = skippable;
914 #if CONFIG_INTERNAL_STATS
915   ctx->best_mode_index = mode_index;
916 #endif  // CONFIG_INTERNAL_STATS
917   ctx->mic = *xd->mi[0];
918   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
919                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
920   ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE];
921   ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE];
922   ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT];
923 }
924 
setup_buffer_ref_mvs_inter(const AV1_COMP * const cpi,MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame,BLOCK_SIZE block_size,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])925 static AOM_INLINE void setup_buffer_ref_mvs_inter(
926     const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame,
927     BLOCK_SIZE block_size, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
928   const AV1_COMMON *cm = &cpi->common;
929   const int num_planes = av1_num_planes(cm);
930   const YV12_BUFFER_CONFIG *scaled_ref_frame =
931       av1_get_scaled_ref_frame(cpi, ref_frame);
932   MACROBLOCKD *const xd = &x->e_mbd;
933   MB_MODE_INFO *const mbmi = xd->mi[0];
934   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
935   const struct scale_factors *const sf =
936       get_ref_scale_factors_const(cm, ref_frame);
937   const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, ref_frame);
938   assert(yv12 != NULL);
939 
940   if (scaled_ref_frame) {
941     // Setup pred block based on scaled reference, because av1_mv_pred() doesn't
942     // support scaling.
943     av1_setup_pred_block(xd, yv12_mb[ref_frame], scaled_ref_frame, NULL, NULL,
944                          num_planes);
945   } else {
946     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
947   }
948 
949   // Gets an initial list of candidate vectors from neighbours and orders them
950   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
951                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
952                    mbmi_ext->mode_context);
953   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
954   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
955   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
956   // Further refinement that is encode side only to test the top few candidates
957   // in full and choose the best as the center point for subsequent searches.
958   // The current implementation doesn't support scaling.
959   av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12_mb[ref_frame][0].stride,
960               ref_frame, block_size);
961 
962   // Go back to unscaled reference.
963   if (scaled_ref_frame) {
964     // We had temporarily setup pred block based on scaled reference above. Go
965     // back to unscaled reference now, for subsequent use.
966     av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, sf, sf, num_planes);
967   }
968 }
969 
970 #define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
971 #define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3)
972 
973 // TODO(jingning): this mv clamping function should be block size dependent.
clamp_mv2(MV * mv,const MACROBLOCKD * xd)974 static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) {
975   const SubpelMvLimits mv_limits = { xd->mb_to_left_edge - LEFT_TOP_MARGIN,
976                                      xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN,
977                                      xd->mb_to_top_edge - LEFT_TOP_MARGIN,
978                                      xd->mb_to_bottom_edge +
979                                          RIGHT_BOTTOM_MARGIN };
980   clamp_mv(mv, &mv_limits);
981 }
982 
983 /* If the current mode shares the same mv with other modes with higher cost,
984  * skip this mode. */
skip_repeated_mv(const AV1_COMMON * const cm,const MACROBLOCK * const x,PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frames[2],InterModeSearchState * search_state)985 static int skip_repeated_mv(const AV1_COMMON *const cm,
986                             const MACROBLOCK *const x,
987                             PREDICTION_MODE this_mode,
988                             const MV_REFERENCE_FRAME ref_frames[2],
989                             InterModeSearchState *search_state) {
990   const int is_comp_pred = ref_frames[1] > INTRA_FRAME;
991   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames);
992   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
993   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
994   PREDICTION_MODE compare_mode = MB_MODE_COUNT;
995   if (!is_comp_pred) {
996     if (this_mode == NEARMV) {
997       if (ref_mv_count == 0) {
998         // NEARMV has the same motion vector as NEARESTMV
999         compare_mode = NEARESTMV;
1000       }
1001       if (ref_mv_count == 1 &&
1002           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1003         // NEARMV has the same motion vector as GLOBALMV
1004         compare_mode = GLOBALMV;
1005       }
1006     }
1007     if (this_mode == GLOBALMV) {
1008       if (ref_mv_count == 0 &&
1009           cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) {
1010         // GLOBALMV has the same motion vector as NEARESTMV
1011         compare_mode = NEARESTMV;
1012       }
1013       if (ref_mv_count == 1) {
1014         // GLOBALMV has the same motion vector as NEARMV
1015         compare_mode = NEARMV;
1016       }
1017     }
1018 
1019     if (compare_mode != MB_MODE_COUNT) {
1020       // Use modelled_rd to check whether compare mode was searched
1021       if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] !=
1022           INT64_MAX) {
1023         const int16_t mode_ctx =
1024             av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames);
1025         const int compare_cost =
1026             cost_mv_ref(&x->mode_costs, compare_mode, mode_ctx);
1027         const int this_cost = cost_mv_ref(&x->mode_costs, this_mode, mode_ctx);
1028 
1029         // Only skip if the mode cost is larger than compare mode cost
1030         if (this_cost > compare_cost) {
1031           search_state->modelled_rd[this_mode][0][ref_frames[0]] =
1032               search_state->modelled_rd[compare_mode][0][ref_frames[0]];
1033           return 1;
1034         }
1035       }
1036     }
1037   }
1038   return 0;
1039 }
1040 
clamp_and_check_mv(int_mv * out_mv,int_mv in_mv,const AV1_COMMON * cm,const MACROBLOCK * x)1041 static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv,
1042                                      const AV1_COMMON *cm,
1043                                      const MACROBLOCK *x) {
1044   const MACROBLOCKD *const xd = &x->e_mbd;
1045   *out_mv = in_mv;
1046   lower_mv_precision(&out_mv->as_mv, cm->features.allow_high_precision_mv,
1047                      cm->features.cur_frame_force_integer_mv);
1048   clamp_mv2(&out_mv->as_mv, xd);
1049   return av1_is_fullmv_in_range(&x->mv_limits,
1050                                 get_fullmv_from_mv(&out_mv->as_mv));
1051 }
1052 
1053 // To use single newmv directly for compound modes, need to clamp the mv to the
1054 // valid mv range. Without this, encoder would generate out of range mv, and
1055 // this is seen in 8k encoding.
clamp_mv_in_range(MACROBLOCK * const x,int_mv * mv,int ref_idx)1056 static INLINE void clamp_mv_in_range(MACROBLOCK *const x, int_mv *mv,
1057                                      int ref_idx) {
1058   const int_mv ref_mv = av1_get_ref_mv(x, ref_idx);
1059   SubpelMvLimits mv_limits;
1060 
1061   av1_set_subpel_mv_search_range(&mv_limits, &x->mv_limits, &ref_mv.as_mv);
1062   clamp_mv(&mv->as_mv, &mv_limits);
1063 }
1064 
handle_newmv(const AV1_COMP * const cpi,MACROBLOCK * const x,const BLOCK_SIZE bsize,int_mv * cur_mv,int * const rate_mv,HandleInterModeArgs * const args,inter_mode_info * mode_info)1065 static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
1066                             const BLOCK_SIZE bsize, int_mv *cur_mv,
1067                             int *const rate_mv, HandleInterModeArgs *const args,
1068                             inter_mode_info *mode_info) {
1069   MACROBLOCKD *const xd = &x->e_mbd;
1070   MB_MODE_INFO *const mbmi = xd->mi[0];
1071   const int is_comp_pred = has_second_ref(mbmi);
1072   const PREDICTION_MODE this_mode = mbmi->mode;
1073   const int refs[2] = { mbmi->ref_frame[0],
1074                         mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] };
1075   const int ref_mv_idx = mbmi->ref_mv_idx;
1076 
1077   if (is_comp_pred) {
1078     const int valid_mv0 = args->single_newmv_valid[ref_mv_idx][refs[0]];
1079     const int valid_mv1 = args->single_newmv_valid[ref_mv_idx][refs[1]];
1080     if (this_mode == NEW_NEWMV) {
1081       if (valid_mv0) {
1082         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1083         clamp_mv_in_range(x, &cur_mv[0], 0);
1084       }
1085       if (valid_mv1) {
1086         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1087         clamp_mv_in_range(x, &cur_mv[1], 1);
1088       }
1089       *rate_mv = 0;
1090       for (int i = 0; i < 2; ++i) {
1091         const int_mv ref_mv = av1_get_ref_mv(x, i);
1092         *rate_mv += av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv,
1093                                     x->mv_costs->nmv_joint_cost,
1094                                     x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1095       }
1096     } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) {
1097       if (valid_mv1) {
1098         cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int;
1099         clamp_mv_in_range(x, &cur_mv[1], 1);
1100       }
1101       const int_mv ref_mv = av1_get_ref_mv(x, 1);
1102       *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv,
1103                                  x->mv_costs->nmv_joint_cost,
1104                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1105     } else {
1106       assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV);
1107       if (valid_mv0) {
1108         cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int;
1109         clamp_mv_in_range(x, &cur_mv[0], 0);
1110       }
1111       const int_mv ref_mv = av1_get_ref_mv(x, 0);
1112       *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv,
1113                                  x->mv_costs->nmv_joint_cost,
1114                                  x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1115     }
1116   } else {
1117     // Single ref case.
1118     const int ref_idx = 0;
1119     int search_range = INT_MAX;
1120 
1121     if (cpi->sf.mv_sf.reduce_search_range && mbmi->ref_mv_idx > 0) {
1122       const MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv;
1123       int min_mv_diff = INT_MAX;
1124       int best_match = -1;
1125       MV prev_ref_mv[2] = { { 0 } };
1126       for (int idx = 0; idx < mbmi->ref_mv_idx; ++idx) {
1127         prev_ref_mv[idx] = av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame,
1128                                                      idx, &x->mbmi_ext)
1129                                .as_mv;
1130         const int ref_mv_diff = AOMMAX(abs(ref_mv.row - prev_ref_mv[idx].row),
1131                                        abs(ref_mv.col - prev_ref_mv[idx].col));
1132 
1133         if (min_mv_diff > ref_mv_diff) {
1134           min_mv_diff = ref_mv_diff;
1135           best_match = idx;
1136         }
1137       }
1138 
1139       if (min_mv_diff < (16 << 3)) {
1140         if (args->single_newmv_valid[best_match][refs[0]]) {
1141           search_range = min_mv_diff;
1142           search_range +=
1143               AOMMAX(abs(args->single_newmv[best_match][refs[0]].as_mv.row -
1144                          prev_ref_mv[best_match].row),
1145                      abs(args->single_newmv[best_match][refs[0]].as_mv.col -
1146                          prev_ref_mv[best_match].col));
1147           // Get full pixel search range.
1148           search_range = (search_range + 4) >> 3;
1149         }
1150       }
1151     }
1152 
1153     int_mv best_mv;
1154     av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
1155                              mode_info, &best_mv, args);
1156     if (best_mv.as_int == INVALID_MV) return INT64_MAX;
1157 
1158     args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
1159     args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
1160     args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
1161     cur_mv[0].as_int = best_mv.as_int;
1162 
1163     // Return after single_newmv is set.
1164     if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
1165   }
1166 
1167   return 0;
1168 }
1169 
update_mode_start_end_index(const AV1_COMP * const cpi,int * mode_index_start,int * mode_index_end,int last_motion_mode_allowed,int interintra_allowed,int eval_motion_mode)1170 static INLINE void update_mode_start_end_index(const AV1_COMP *const cpi,
1171                                                int *mode_index_start,
1172                                                int *mode_index_end,
1173                                                int last_motion_mode_allowed,
1174                                                int interintra_allowed,
1175                                                int eval_motion_mode) {
1176   *mode_index_start = (int)SIMPLE_TRANSLATION;
1177   *mode_index_end = (int)last_motion_mode_allowed + interintra_allowed;
1178   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
1179     if (!eval_motion_mode) {
1180       *mode_index_end = (int)SIMPLE_TRANSLATION;
1181     } else {
1182       // Set the start index appropriately to process motion modes other than
1183       // simple translation
1184       *mode_index_start = 1;
1185     }
1186   }
1187 }
1188 
1189 /*!\brief AV1 motion mode search
1190  *
1191  * \ingroup inter_mode_search
1192  * Function to search over and determine the motion mode. It will update
1193  * mbmi->motion_mode to one of SIMPLE_TRANSLATION, OBMC_CAUSAL, or
1194  * WARPED_CAUSAL and determine any necessary side information for the selected
1195  * motion mode. It will also perform the full transform search, unless the
1196  * input parameter do_tx_search indicates to do an estimation of the RD rather
1197  * than an RD corresponding to a full transform search. It will return the
1198  * RD for the final motion_mode.
1199  * Do the RD search for a given inter mode and compute all information relevant
1200  * to the input mode. It will compute the best MV,
1201  * compound parameters (if the mode is a compound mode) and interpolation filter
1202  * parameters.
1203  *
1204  * \param[in]     cpi               Top-level encoder structure.
1205  * \param[in]     tile_data         Pointer to struct holding adaptive
1206  *                                  data/contexts/models for the tile during
1207  *                                  encoding.
1208  * \param[in]     x                 Pointer to struct holding all the data for
1209  *                                  the current macroblock.
1210  * \param[in]     bsize             Current block size.
1211  * \param[in,out] rd_stats          Struct to keep track of the overall RD
1212  *                                  information.
1213  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
1214  *                                  for only the Y plane.
1215  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
1216  *                                  for only the UV planes.
1217  * \param[in]     args              HandleInterModeArgs struct holding
1218  *                                  miscellaneous arguments for inter mode
1219  *                                  search. See the documentation for this
1220  *                                  struct for a description of each member.
1221  * \param[in]     ref_best_rd       Best RD found so far for this block.
1222  *                                  It is used for early termination of this
1223  *                                  search if the RD exceeds this value.
1224  * \param[in,out] ref_skip_rd       A length 2 array, where skip_rd[0] is the
1225  *                                  best total RD for a skip mode so far, and
1226  *                                  skip_rd[1] is the best RD for a skip mode so
1227  *                                  far in luma. This is used as a speed feature
1228  *                                  to skip the transform search if the computed
1229  *                                  skip RD for the current mode is not better
1230  *                                  than the best skip_rd so far.
1231  * \param[in,out] rate_mv           The rate associated with the motion vectors.
1232  *                                  This will be modified if a motion search is
1233  *                                  done in the motion mode search.
1234  * \param[in,out] orig_dst          A prediction buffer to hold a computed
1235  *                                  prediction. This will eventually hold the
1236  *                                  final prediction, and the tmp_dst info will
1237  *                                  be copied here.
1238  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
1239  *                                  do_tx_search (see below) is 0.
1240  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
1241  *                                  a full transform search. This will compute
1242  *                                  an estimated RD for the modes without the
1243  *                                  transform search and later perform the full
1244  *                                  transform search on the best candidates.
1245  * \param[in]     inter_modes_info  InterModesInfo struct to hold inter mode
1246  *                                  information to perform a full transform
1247  *                                  search only on winning candidates searched
1248  *                                  with an estimate for transform coding RD.
1249  * \param[in]     eval_motion_mode  Boolean whether or not to evaluate motion
1250  *                                  motion modes other than SIMPLE_TRANSLATION.
1251  * \param[out]    yrd               Stores the rdcost corresponding to encoding
1252  *                                  the luma plane.
1253  * \return Returns INT64_MAX if the determined motion mode is invalid and the
1254  * current motion mode being tested should be skipped. It returns 0 if the
1255  * motion mode search is a success.
1256  */
motion_mode_rd(const AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * const args,int64_t ref_best_rd,int64_t * ref_skip_rd,int * rate_mv,const BUFFER_SET * orig_dst,int64_t * best_est_rd,int do_tx_search,InterModesInfo * inter_modes_info,int eval_motion_mode,int64_t * yrd)1257 static int64_t motion_mode_rd(
1258     const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x,
1259     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
1260     RD_STATS *rd_stats_uv, HandleInterModeArgs *const args, int64_t ref_best_rd,
1261     int64_t *ref_skip_rd, int *rate_mv, const BUFFER_SET *orig_dst,
1262     int64_t *best_est_rd, int do_tx_search, InterModesInfo *inter_modes_info,
1263     int eval_motion_mode, int64_t *yrd) {
1264   const AV1_COMMON *const cm = &cpi->common;
1265   const FeatureFlags *const features = &cm->features;
1266   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
1267   const int num_planes = av1_num_planes(cm);
1268   MACROBLOCKD *xd = &x->e_mbd;
1269   MB_MODE_INFO *mbmi = xd->mi[0];
1270   const int is_comp_pred = has_second_ref(mbmi);
1271   const PREDICTION_MODE this_mode = mbmi->mode;
1272   const int rate2_nocoeff = rd_stats->rate;
1273   int best_xskip_txfm = 0;
1274   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
1275   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
1276   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
1277   const int rate_mv0 = *rate_mv;
1278   const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
1279                                  is_interintra_allowed(mbmi) &&
1280                                  mbmi->compound_idx;
1281   WARP_SAMPLE_INFO *const warp_sample_info =
1282       &x->warp_sample_info[mbmi->ref_frame[0]];
1283   int *pts0 = warp_sample_info->pts;
1284   int *pts_inref0 = warp_sample_info->pts_inref;
1285 
1286   assert(mbmi->ref_frame[1] != INTRA_FRAME);
1287   const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1];
1288   av1_invalid_rd_stats(&best_rd_stats);
1289   mbmi->num_proj_ref = 1;  // assume num_proj_ref >=1
1290   MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION;
1291   *yrd = INT64_MAX;
1292   if (features->switchable_motion_mode) {
1293     // Determine which motion modes to search if more than SIMPLE_TRANSLATION
1294     // is allowed.
1295     last_motion_mode_allowed = motion_mode_allowed(
1296         xd->global_motion, xd, mbmi, features->allow_warped_motion);
1297   }
1298 
1299   if (last_motion_mode_allowed == WARPED_CAUSAL) {
1300     // Collect projection samples used in least squares approximation of
1301     // the warped motion parameters if WARPED_CAUSAL is going to be searched.
1302     if (warp_sample_info->num < 0) {
1303       warp_sample_info->num = av1_findSamples(cm, xd, pts0, pts_inref0);
1304     }
1305     mbmi->num_proj_ref = warp_sample_info->num;
1306   }
1307   const int total_samples = mbmi->num_proj_ref;
1308   if (total_samples == 0) {
1309     // Do not search WARPED_CAUSAL if there are no samples to use to determine
1310     // warped parameters.
1311     last_motion_mode_allowed = OBMC_CAUSAL;
1312   }
1313 
1314   const MB_MODE_INFO base_mbmi = *mbmi;
1315   MB_MODE_INFO best_mbmi;
1316   const int interp_filter = features->interp_filter;
1317   const int switchable_rate =
1318       av1_is_interp_needed(xd)
1319           ? av1_get_switchable_rate(x, xd, interp_filter,
1320                                     cm->seq_params->enable_dual_filter)
1321           : 0;
1322   int64_t best_rd = INT64_MAX;
1323   int best_rate_mv = rate_mv0;
1324   const int mi_row = xd->mi_row;
1325   const int mi_col = xd->mi_col;
1326   int mode_index_start, mode_index_end;
1327   // Modify the start and end index according to speed features. For example,
1328   // if SIMPLE_TRANSLATION has already been searched according to
1329   // the motion_mode_for_winner_cand speed feature, update the mode_index_start
1330   // to avoid searching it again.
1331   update_mode_start_end_index(cpi, &mode_index_start, &mode_index_end,
1332                               last_motion_mode_allowed, interintra_allowed,
1333                               eval_motion_mode);
1334   // Main function loop. This loops over all of the possible motion modes and
1335   // computes RD to determine the best one. This process includes computing
1336   // any necessary side information for the motion mode and performing the
1337   // transform search.
1338   for (int mode_index = mode_index_start; mode_index <= mode_index_end;
1339        mode_index++) {
1340     if (args->skip_motion_mode && mode_index) continue;
1341     int tmp_rate2 = rate2_nocoeff;
1342     const int is_interintra_mode = mode_index > (int)last_motion_mode_allowed;
1343     int tmp_rate_mv = rate_mv0;
1344 
1345     *mbmi = base_mbmi;
1346     if (is_interintra_mode) {
1347       // Only use SIMPLE_TRANSLATION for interintra
1348       mbmi->motion_mode = SIMPLE_TRANSLATION;
1349     } else {
1350       mbmi->motion_mode = (MOTION_MODE)mode_index;
1351       assert(mbmi->ref_frame[1] != INTRA_FRAME);
1352     }
1353 
1354     // Do not search OBMC if the probability of selecting it is below a
1355     // predetermined threshold for this update_type and block size.
1356     const FRAME_UPDATE_TYPE update_type =
1357         get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
1358     const int prune_obmc =
1359         cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
1360         cpi->sf.inter_sf.prune_obmc_prob_thresh;
1361     if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
1362         mbmi->motion_mode == OBMC_CAUSAL)
1363       continue;
1364 
1365     if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) {
1366       // SIMPLE_TRANSLATION mode: no need to recalculate.
1367       // The prediction is calculated before motion_mode_rd() is called in
1368       // handle_inter_mode()
1369     } else if (mbmi->motion_mode == OBMC_CAUSAL) {
1370       const uint32_t cur_mv = mbmi->mv[0].as_int;
1371       // OBMC_CAUSAL not allowed for compound prediction
1372       assert(!is_comp_pred);
1373       if (have_newmv_in_inter_mode(this_mode)) {
1374         av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
1375                                  &mbmi->mv[0], NULL);
1376         tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1377       }
1378       if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
1379         // Build the predictor according to the current motion vector if it has
1380         // not already been built
1381         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
1382                                       0, av1_num_planes(cm) - 1);
1383       }
1384       // Build the inter predictor by blending the predictor corresponding to
1385       // this MV, and the neighboring blocks using the OBMC model
1386       av1_build_obmc_inter_prediction(
1387           cm, xd, args->above_pred_buf, args->above_pred_stride,
1388           args->left_pred_buf, args->left_pred_stride);
1389 #if !CONFIG_REALTIME_ONLY
1390     } else if (mbmi->motion_mode == WARPED_CAUSAL) {
1391       int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
1392       mbmi->motion_mode = WARPED_CAUSAL;
1393       mbmi->wm_params.wmtype = DEFAULT_WMTYPE;
1394       mbmi->interp_filters =
1395           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1396 
1397       memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0));
1398       memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0));
1399       // Select the samples according to motion vector difference
1400       if (mbmi->num_proj_ref > 1) {
1401         mbmi->num_proj_ref = av1_selectSamples(
1402             &mbmi->mv[0].as_mv, pts, pts_inref, mbmi->num_proj_ref, bsize);
1403       }
1404 
1405       // Compute the warped motion parameters with a least squares fit
1406       //  using the collected samples
1407       if (!av1_find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize,
1408                                mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col,
1409                                &mbmi->wm_params, mi_row, mi_col)) {
1410         assert(!is_comp_pred);
1411         if (have_newmv_in_inter_mode(this_mode)) {
1412           // Refine MV for NEWMV mode
1413           const int_mv mv0 = mbmi->mv[0];
1414           const WarpedMotionParams wm_params0 = mbmi->wm_params;
1415           const int num_proj_ref0 = mbmi->num_proj_ref;
1416 
1417           const int_mv ref_mv = av1_get_ref_mv(x, 0);
1418           SUBPEL_MOTION_SEARCH_PARAMS ms_params;
1419           av1_make_default_subpel_ms_params(&ms_params, cpi, x, bsize,
1420                                             &ref_mv.as_mv, NULL);
1421 
1422           // Refine MV in a small range.
1423           av1_refine_warped_mv(xd, cm, &ms_params, bsize, pts0, pts_inref0,
1424                                total_samples);
1425 
1426           if (mv0.as_int != mbmi->mv[0].as_int) {
1427             // Keep the refined MV and WM parameters.
1428             tmp_rate_mv = av1_mv_bit_cost(
1429                 &mbmi->mv[0].as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
1430                 x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
1431             tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
1432           } else {
1433             // Restore the old MV and WM parameters.
1434             mbmi->mv[0] = mv0;
1435             mbmi->wm_params = wm_params0;
1436             mbmi->num_proj_ref = num_proj_ref0;
1437           }
1438         }
1439 
1440         // Build the warped predictor
1441         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
1442                                       av1_num_planes(cm) - 1);
1443       } else {
1444         continue;
1445       }
1446 #endif  // !CONFIG_REALTIME_ONLY
1447     } else if (is_interintra_mode) {
1448       const int ret =
1449           av1_handle_inter_intra_mode(cpi, x, bsize, mbmi, args, ref_best_rd,
1450                                       &tmp_rate_mv, &tmp_rate2, orig_dst);
1451       if (ret < 0) continue;
1452     }
1453 
1454     // If we are searching newmv and the mv is the same as refmv, skip the
1455     // current mode
1456     if (!av1_check_newmv_joint_nonzero(cm, x)) continue;
1457 
1458     // Update rd_stats for the current motion mode
1459     txfm_info->skip_txfm = 0;
1460     rd_stats->dist = 0;
1461     rd_stats->sse = 0;
1462     rd_stats->skip_txfm = 1;
1463     rd_stats->rate = tmp_rate2;
1464     const ModeCosts *mode_costs = &x->mode_costs;
1465     if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate;
1466     if (interintra_allowed) {
1467       rd_stats->rate +=
1468           mode_costs->interintra_cost[size_group_lookup[bsize]]
1469                                      [mbmi->ref_frame[1] == INTRA_FRAME];
1470     }
1471     if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) &&
1472         (mbmi->ref_frame[1] != INTRA_FRAME)) {
1473       if (last_motion_mode_allowed == WARPED_CAUSAL) {
1474         rd_stats->rate +=
1475             mode_costs->motion_mode_cost[bsize][mbmi->motion_mode];
1476       } else {
1477         rd_stats->rate +=
1478             mode_costs->motion_mode_cost1[bsize][mbmi->motion_mode];
1479       }
1480     }
1481 
1482     int64_t this_yrd = INT64_MAX;
1483 
1484     if (!do_tx_search) {
1485       // Avoid doing a transform search here to speed up the overall mode
1486       // search. It will be done later in the mode search if the current
1487       // motion mode seems promising.
1488       int64_t curr_sse = -1;
1489       int64_t sse_y = -1;
1490       int est_residue_cost = 0;
1491       int64_t est_dist = 0;
1492       int64_t est_rd = 0;
1493       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1494         curr_sse = get_sse(cpi, x, &sse_y);
1495         const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse,
1496                                                  &est_residue_cost, &est_dist);
1497         (void)has_est_rd;
1498         assert(has_est_rd);
1499       } else if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 ||
1500                  cpi->sf.rt_sf.use_nonrd_pick_mode) {
1501         model_rd_sb_fn[MODELRD_TYPE_MOTION_MODE_RD](
1502             cpi, bsize, x, xd, 0, num_planes - 1, &est_residue_cost, &est_dist,
1503             NULL, &curr_sse, NULL, NULL, NULL);
1504         sse_y = x->pred_sse[xd->mi[0]->ref_frame[0]];
1505       }
1506       est_rd = RDCOST(x->rdmult, rd_stats->rate + est_residue_cost, est_dist);
1507       if (est_rd * 0.80 > *best_est_rd) {
1508         mbmi->ref_frame[1] = ref_frame_1;
1509         continue;
1510       }
1511       const int mode_rate = rd_stats->rate;
1512       rd_stats->rate += est_residue_cost;
1513       rd_stats->dist = est_dist;
1514       rd_stats->rdcost = est_rd;
1515       if (rd_stats->rdcost < *best_est_rd) {
1516         *best_est_rd = rd_stats->rdcost;
1517         assert(sse_y >= 0);
1518         ref_skip_rd[1] = cpi->sf.inter_sf.txfm_rd_gate_level
1519                              ? RDCOST(x->rdmult, mode_rate, (sse_y << 4))
1520                              : INT64_MAX;
1521       }
1522       if (cm->current_frame.reference_mode == SINGLE_REFERENCE) {
1523         if (!is_comp_pred) {
1524           assert(curr_sse >= 0);
1525           inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1526                                 rd_stats->rdcost, rd_stats, rd_stats_y,
1527                                 rd_stats_uv, mbmi);
1528         }
1529       } else {
1530         assert(curr_sse >= 0);
1531         inter_modes_info_push(inter_modes_info, mode_rate, curr_sse,
1532                               rd_stats->rdcost, rd_stats, rd_stats_y,
1533                               rd_stats_uv, mbmi);
1534       }
1535       mbmi->skip_txfm = 0;
1536     } else {
1537       // Perform full transform search
1538       int64_t skip_rd = INT64_MAX;
1539       int64_t skip_rdy = INT64_MAX;
1540       if (cpi->sf.inter_sf.txfm_rd_gate_level) {
1541         // Check if the mode is good enough based on skip RD
1542         int64_t sse_y = INT64_MAX;
1543         int64_t curr_sse = get_sse(cpi, x, &sse_y);
1544         skip_rd = RDCOST(x->rdmult, rd_stats->rate, curr_sse);
1545         skip_rdy = RDCOST(x->rdmult, rd_stats->rate, (sse_y << 4));
1546         int eval_txfm = check_txfm_eval(x, bsize, ref_skip_rd[0], skip_rd,
1547                                         cpi->sf.inter_sf.txfm_rd_gate_level, 0);
1548         if (!eval_txfm) continue;
1549       }
1550 
1551       // Do transform search
1552       const int mode_rate = rd_stats->rate;
1553       if (!av1_txfm_search(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv,
1554                            rd_stats->rate, ref_best_rd)) {
1555         if (rd_stats_y->rate == INT_MAX && mode_index == 0) {
1556           return INT64_MAX;
1557         }
1558         continue;
1559       }
1560       const int skip_ctx = av1_get_skip_txfm_context(xd);
1561       const int y_rate =
1562           rd_stats->skip_txfm
1563               ? x->mode_costs.skip_txfm_cost[skip_ctx][1]
1564               : (rd_stats_y->rate + x->mode_costs.skip_txfm_cost[skip_ctx][0]);
1565       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y->dist);
1566 
1567       const int64_t curr_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1568       if (curr_rd < ref_best_rd) {
1569         ref_best_rd = curr_rd;
1570         ref_skip_rd[0] = skip_rd;
1571         ref_skip_rd[1] = skip_rdy;
1572       }
1573       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
1574         inter_mode_data_push(
1575             tile_data, mbmi->bsize, rd_stats->sse, rd_stats->dist,
1576             rd_stats_y->rate + rd_stats_uv->rate +
1577                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
1578       }
1579     }
1580 
1581     if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) {
1582       if (is_nontrans_global_motion(xd, xd->mi[0])) {
1583         mbmi->interp_filters =
1584             av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
1585       }
1586     }
1587 
1588     const int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1589     if (mode_index == 0) {
1590       args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd;
1591     }
1592     if (mode_index == 0 || tmp_rd < best_rd) {
1593       // Update best_rd data if this is the best motion mode so far
1594       best_mbmi = *mbmi;
1595       best_rd = tmp_rd;
1596       best_rd_stats = *rd_stats;
1597       best_rd_stats_y = *rd_stats_y;
1598       best_rate_mv = tmp_rate_mv;
1599       *yrd = this_yrd;
1600       if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv;
1601       memcpy(best_blk_skip, txfm_info->blk_skip,
1602              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1603       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
1604       best_xskip_txfm = mbmi->skip_txfm;
1605     }
1606   }
1607   // Update RD and mbmi stats for selected motion mode
1608   mbmi->ref_frame[1] = ref_frame_1;
1609   *rate_mv = best_rate_mv;
1610   if (best_rd == INT64_MAX || !av1_check_newmv_joint_nonzero(cm, x)) {
1611     av1_invalid_rd_stats(rd_stats);
1612     restore_dst_buf(xd, *orig_dst, num_planes);
1613     return INT64_MAX;
1614   }
1615   *mbmi = best_mbmi;
1616   *rd_stats = best_rd_stats;
1617   *rd_stats_y = best_rd_stats_y;
1618   if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv;
1619   memcpy(txfm_info->blk_skip, best_blk_skip,
1620          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
1621   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
1622   txfm_info->skip_txfm = best_xskip_txfm;
1623 
1624   restore_dst_buf(xd, *orig_dst, num_planes);
1625   return 0;
1626 }
1627 
skip_mode_rd(RD_STATS * rd_stats,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,const BUFFER_SET * const orig_dst)1628 static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi,
1629                             MACROBLOCK *const x, BLOCK_SIZE bsize,
1630                             const BUFFER_SET *const orig_dst) {
1631   assert(bsize < BLOCK_SIZES_ALL);
1632   const AV1_COMMON *cm = &cpi->common;
1633   const int num_planes = av1_num_planes(cm);
1634   MACROBLOCKD *const xd = &x->e_mbd;
1635   const int mi_row = xd->mi_row;
1636   const int mi_col = xd->mi_col;
1637   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize, 0,
1638                                 av1_num_planes(cm) - 1);
1639 
1640   int64_t total_sse = 0;
1641   for (int plane = 0; plane < num_planes; ++plane) {
1642     const struct macroblock_plane *const p = &x->plane[plane];
1643     const struct macroblockd_plane *const pd = &xd->plane[plane];
1644     const BLOCK_SIZE plane_bsize =
1645         get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1646     const int bw = block_size_wide[plane_bsize];
1647     const int bh = block_size_high[plane_bsize];
1648 
1649     av1_subtract_plane(x, plane_bsize, plane);
1650     int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh) << 4;
1651     total_sse += sse;
1652   }
1653   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
1654   rd_stats->dist = rd_stats->sse = total_sse;
1655   rd_stats->rate = x->mode_costs.skip_mode_cost[skip_mode_ctx][1];
1656   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
1657 
1658   restore_dst_buf(xd, *orig_dst, num_planes);
1659   return 0;
1660 }
1661 
1662 // Check NEARESTMV, NEARMV, GLOBALMV ref mvs for duplicate and skip the relevant
1663 // mode
check_repeat_ref_mv(const MB_MODE_INFO_EXT * mbmi_ext,int ref_idx,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE single_mode)1664 static INLINE int check_repeat_ref_mv(const MB_MODE_INFO_EXT *mbmi_ext,
1665                                       int ref_idx,
1666                                       const MV_REFERENCE_FRAME *ref_frame,
1667                                       PREDICTION_MODE single_mode) {
1668   const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1669   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1670   assert(single_mode != NEWMV);
1671   if (single_mode == NEARESTMV) {
1672     return 0;
1673   } else if (single_mode == NEARMV) {
1674     // when ref_mv_count = 0, NEARESTMV and NEARMV are same as GLOBALMV
1675     // when ref_mv_count = 1, NEARMV is same as GLOBALMV
1676     if (ref_mv_count < 2) return 1;
1677   } else if (single_mode == GLOBALMV) {
1678     // when ref_mv_count == 0, GLOBALMV is same as NEARESTMV
1679     if (ref_mv_count == 0) return 1;
1680     // when ref_mv_count == 1, NEARMV is same as GLOBALMV
1681     else if (ref_mv_count == 1)
1682       return 0;
1683 
1684     int stack_size = AOMMIN(USABLE_REF_MV_STACK_SIZE, ref_mv_count);
1685     // Check GLOBALMV is matching with any mv in ref_mv_stack
1686     for (int ref_mv_idx = 0; ref_mv_idx < stack_size; ref_mv_idx++) {
1687       int_mv this_mv;
1688 
1689       if (ref_idx == 0)
1690         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv;
1691       else
1692         this_mv = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv;
1693 
1694       if (this_mv.as_int == mbmi_ext->global_mvs[ref_frame[ref_idx]].as_int)
1695         return 1;
1696     }
1697   }
1698   return 0;
1699 }
1700 
get_this_mv(int_mv * this_mv,PREDICTION_MODE this_mode,int ref_idx,int ref_mv_idx,int skip_repeated_ref_mv,const MV_REFERENCE_FRAME * ref_frame,const MB_MODE_INFO_EXT * mbmi_ext)1701 static INLINE int get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode,
1702                               int ref_idx, int ref_mv_idx,
1703                               int skip_repeated_ref_mv,
1704                               const MV_REFERENCE_FRAME *ref_frame,
1705                               const MB_MODE_INFO_EXT *mbmi_ext) {
1706   const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1707   assert(is_inter_singleref_mode(single_mode));
1708   if (single_mode == NEWMV) {
1709     this_mv->as_int = INVALID_MV;
1710   } else if (single_mode == GLOBALMV) {
1711     if (skip_repeated_ref_mv &&
1712         check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1713       return 0;
1714     *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1715   } else {
1716     assert(single_mode == NEARMV || single_mode == NEARESTMV);
1717     const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1718     const int ref_mv_offset = single_mode == NEARESTMV ? 0 : ref_mv_idx + 1;
1719     if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) {
1720       assert(ref_mv_offset >= 0);
1721       if (ref_idx == 0) {
1722         *this_mv =
1723             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv;
1724       } else {
1725         *this_mv =
1726             mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv;
1727       }
1728     } else {
1729       if (skip_repeated_ref_mv &&
1730           check_repeat_ref_mv(mbmi_ext, ref_idx, ref_frame, single_mode))
1731         return 0;
1732       *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]];
1733     }
1734   }
1735   return 1;
1736 }
1737 
1738 // Skip NEARESTMV and NEARMV modes based on refmv weight computed in ref mv list
1739 // population
skip_nearest_near_mv_using_refmv_weight(const MACROBLOCK * const x,const PREDICTION_MODE this_mode,const int8_t ref_frame_type)1740 static INLINE int skip_nearest_near_mv_using_refmv_weight(
1741     const MACROBLOCK *const x, const PREDICTION_MODE this_mode,
1742     const int8_t ref_frame_type) {
1743   if (this_mode != NEARESTMV && this_mode != NEARMV) return 0;
1744 
1745   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1746   const uint16_t *const ref_mv_weight = mbmi_ext->weight[ref_frame_type];
1747   const int ref_mv_count =
1748       AOMMIN(MAX_REF_MV_SEARCH, mbmi_ext->ref_mv_count[ref_frame_type]);
1749 
1750   if (ref_mv_count == 0) return 0;
1751   // If ref mv list has atleast one nearest candidate do not prune NEARESTMV
1752   if (this_mode == NEARESTMV && ref_mv_weight[0] >= REF_CAT_LEVEL) return 0;
1753 
1754   // Count number of ref mvs populated from nearest candidates
1755   int nearest_refmv_count = 0;
1756   for (int ref_mv_idx = 0; ref_mv_idx < ref_mv_count; ref_mv_idx++) {
1757     if (ref_mv_weight[ref_mv_idx] >= REF_CAT_LEVEL) nearest_refmv_count++;
1758   }
1759 
1760   // nearest_refmv_count indicates the closeness of block motion characteristics
1761   // with respect to its spatial neighbor. Lower value of nearest_refmv_count
1762   // means less correlation with its spatial neighbors. Hence less possibility
1763   // for NEARESTMV and NEARMV modes becoming the best mode since these modes
1764   // work well for blocks that shares similar motion characteristics with its
1765   // neighbor. Thus, when nearest_refmv_count is less w.r.t ref_mv_count prune
1766   // the mode.
1767   const int prune_thresh = 1 + (ref_mv_count >= 2);
1768   if (nearest_refmv_count < prune_thresh) return 1;
1769   return 0;
1770 }
1771 
1772 // This function update the non-new mv for the current prediction mode
build_cur_mv(int_mv * cur_mv,PREDICTION_MODE this_mode,const AV1_COMMON * cm,const MACROBLOCK * x,int skip_repeated_ref_mv)1773 static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode,
1774                                const AV1_COMMON *cm, const MACROBLOCK *x,
1775                                int skip_repeated_ref_mv) {
1776   const MACROBLOCKD *xd = &x->e_mbd;
1777   const MB_MODE_INFO *mbmi = xd->mi[0];
1778   const int is_comp_pred = has_second_ref(mbmi);
1779 
1780   int ret = 1;
1781   for (int i = 0; i < is_comp_pred + 1; ++i) {
1782     int_mv this_mv;
1783     this_mv.as_int = INVALID_MV;
1784     ret = get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx,
1785                       skip_repeated_ref_mv, mbmi->ref_frame, &x->mbmi_ext);
1786     if (!ret) return 0;
1787     const PREDICTION_MODE single_mode = get_single_mode(this_mode, i);
1788     if (single_mode == NEWMV) {
1789       const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1790       cur_mv[i] =
1791           (i == 0) ? x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1792                          .this_mv
1793                    : x->mbmi_ext.ref_mv_stack[ref_frame_type][mbmi->ref_mv_idx]
1794                          .comp_mv;
1795     } else {
1796       ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x);
1797     }
1798   }
1799   return ret;
1800 }
1801 
get_drl_cost(const MB_MODE_INFO * mbmi,const MB_MODE_INFO_EXT * mbmi_ext,const int (* const drl_mode_cost0)[2],int8_t ref_frame_type)1802 static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi,
1803                                const MB_MODE_INFO_EXT *mbmi_ext,
1804                                const int (*const drl_mode_cost0)[2],
1805                                int8_t ref_frame_type) {
1806   int cost = 0;
1807   if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) {
1808     for (int idx = 0; idx < 2; ++idx) {
1809       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1810         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1811         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx];
1812         if (mbmi->ref_mv_idx == idx) return cost;
1813       }
1814     }
1815     return cost;
1816   }
1817 
1818   if (have_nearmv_in_inter_mode(mbmi->mode)) {
1819     for (int idx = 1; idx < 3; ++idx) {
1820       if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1821         uint8_t drl_ctx = av1_drl_ctx(mbmi_ext->weight[ref_frame_type], idx);
1822         cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)];
1823         if (mbmi->ref_mv_idx == (idx - 1)) return cost;
1824       }
1825     }
1826     return cost;
1827   }
1828   return cost;
1829 }
1830 
is_single_newmv_valid(const HandleInterModeArgs * const args,const MB_MODE_INFO * const mbmi,PREDICTION_MODE this_mode)1831 static INLINE int is_single_newmv_valid(const HandleInterModeArgs *const args,
1832                                         const MB_MODE_INFO *const mbmi,
1833                                         PREDICTION_MODE this_mode) {
1834   for (int ref_idx = 0; ref_idx < 2; ++ref_idx) {
1835     const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx);
1836     const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx];
1837     if (single_mode == NEWMV &&
1838         args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) {
1839       return 0;
1840     }
1841   }
1842   return 1;
1843 }
1844 
get_drl_refmv_count(const MACROBLOCK * const x,const MV_REFERENCE_FRAME * ref_frame,PREDICTION_MODE mode)1845 static int get_drl_refmv_count(const MACROBLOCK *const x,
1846                                const MV_REFERENCE_FRAME *ref_frame,
1847                                PREDICTION_MODE mode) {
1848   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1849   const int8_t ref_frame_type = av1_ref_frame_type(ref_frame);
1850   const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0;
1851   const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type];
1852   const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV);
1853   const int has_drl =
1854       (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1);
1855   const int ref_set =
1856       has_drl ? AOMMIN(MAX_REF_MV_SEARCH, ref_mv_count - has_nearmv) : 1;
1857 
1858   return ref_set;
1859 }
1860 
1861 // Checks if particular ref_mv_idx should be pruned.
prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,const int qindex,const int ref_mv_idx)1862 static int prune_ref_mv_idx_using_qindex(const int reduce_inter_modes,
1863                                          const int qindex,
1864                                          const int ref_mv_idx) {
1865   if (reduce_inter_modes >= 3) return 1;
1866   // Q-index logic based pruning is enabled only for
1867   // reduce_inter_modes = 2.
1868   assert(reduce_inter_modes == 2);
1869   // When reduce_inter_modes=2, pruning happens as below based on q index.
1870   // For q index range between 0 and 85: prune if ref_mv_idx >= 1.
1871   // For q index range between 86 and 170: prune if ref_mv_idx == 2.
1872   // For q index range between 171 and 255: no pruning.
1873   const int min_prune_ref_mv_idx = (qindex * 3 / QINDEX_RANGE) + 1;
1874   return (ref_mv_idx >= min_prune_ref_mv_idx);
1875 }
1876 
1877 // Whether this reference motion vector can be skipped, based on initial
1878 // heuristics.
ref_mv_idx_early_breakout(const SPEED_FEATURES * const sf,const RefFrameDistanceInfo * const ref_frame_dist_info,MACROBLOCK * x,const HandleInterModeArgs * const args,int64_t ref_best_rd,int ref_mv_idx)1879 static bool ref_mv_idx_early_breakout(
1880     const SPEED_FEATURES *const sf,
1881     const RefFrameDistanceInfo *const ref_frame_dist_info, MACROBLOCK *x,
1882     const HandleInterModeArgs *const args, int64_t ref_best_rd,
1883     int ref_mv_idx) {
1884   MACROBLOCKD *xd = &x->e_mbd;
1885   MB_MODE_INFO *mbmi = xd->mi[0];
1886   const MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1887   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1888   const int is_comp_pred = has_second_ref(mbmi);
1889   if (sf->inter_sf.reduce_inter_modes && ref_mv_idx > 0) {
1890     if (mbmi->ref_frame[0] == LAST2_FRAME ||
1891         mbmi->ref_frame[0] == LAST3_FRAME ||
1892         mbmi->ref_frame[1] == LAST2_FRAME ||
1893         mbmi->ref_frame[1] == LAST3_FRAME) {
1894       const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1895       if (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1896           REF_CAT_LEVEL) {
1897         return true;
1898       }
1899     }
1900     // TODO(any): Experiment with reduce_inter_modes for compound prediction
1901     if (sf->inter_sf.reduce_inter_modes >= 2 && !is_comp_pred &&
1902         have_newmv_in_inter_mode(mbmi->mode)) {
1903       if (mbmi->ref_frame[0] != ref_frame_dist_info->nearest_past_ref &&
1904           mbmi->ref_frame[0] != ref_frame_dist_info->nearest_future_ref) {
1905         const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0;
1906         const int do_prune = prune_ref_mv_idx_using_qindex(
1907             sf->inter_sf.reduce_inter_modes, x->qindex, ref_mv_idx);
1908         if (do_prune &&
1909             (mbmi_ext->weight[ref_frame_type][ref_mv_idx + has_nearmv] <
1910              REF_CAT_LEVEL)) {
1911           return true;
1912         }
1913       }
1914     }
1915   }
1916 
1917   mbmi->ref_mv_idx = ref_mv_idx;
1918   if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, mbmi->mode))) {
1919     return true;
1920   }
1921   size_t est_rd_rate = args->ref_frame_cost + args->single_comp_cost;
1922   const int drl_cost = get_drl_cost(
1923       mbmi, mbmi_ext, x->mode_costs.drl_mode_cost0, ref_frame_type);
1924   est_rd_rate += drl_cost;
1925   if (RDCOST(x->rdmult, est_rd_rate, 0) > ref_best_rd &&
1926       mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
1927     return true;
1928   }
1929   return false;
1930 }
1931 
1932 // Compute the estimated RD cost for the motion vector with simple translation.
simple_translation_pred_rd(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * args,int ref_mv_idx,int64_t ref_best_rd,BLOCK_SIZE bsize)1933 static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
1934                                           RD_STATS *rd_stats,
1935                                           HandleInterModeArgs *args,
1936                                           int ref_mv_idx, int64_t ref_best_rd,
1937                                           BLOCK_SIZE bsize) {
1938   MACROBLOCKD *xd = &x->e_mbd;
1939   MB_MODE_INFO *mbmi = xd->mi[0];
1940   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
1941   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1942   const AV1_COMMON *cm = &cpi->common;
1943   const int is_comp_pred = has_second_ref(mbmi);
1944   const ModeCosts *mode_costs = &x->mode_costs;
1945 
1946   struct macroblockd_plane *p = xd->plane;
1947   const BUFFER_SET orig_dst = {
1948     { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
1949     { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
1950   };
1951   av1_init_rd_stats(rd_stats);
1952 
1953   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1954   mbmi->comp_group_idx = 0;
1955   mbmi->compound_idx = 1;
1956   if (mbmi->ref_frame[1] == INTRA_FRAME) {
1957     mbmi->ref_frame[1] = NONE_FRAME;
1958   }
1959   int16_t mode_ctx =
1960       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1961 
1962   mbmi->num_proj_ref = 0;
1963   mbmi->motion_mode = SIMPLE_TRANSLATION;
1964   mbmi->ref_mv_idx = ref_mv_idx;
1965 
1966   rd_stats->rate += args->ref_frame_cost + args->single_comp_cost;
1967   const int drl_cost =
1968       get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
1969   rd_stats->rate += drl_cost;
1970 
1971   int_mv cur_mv[2];
1972   if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
1973     return INT64_MAX;
1974   }
1975   assert(have_nearmv_in_inter_mode(mbmi->mode));
1976   for (int i = 0; i < is_comp_pred + 1; ++i) {
1977     mbmi->mv[i].as_int = cur_mv[i].as_int;
1978   }
1979   const int ref_mv_cost = cost_mv_ref(mode_costs, mbmi->mode, mode_ctx);
1980   rd_stats->rate += ref_mv_cost;
1981 
1982   if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd) {
1983     return INT64_MAX;
1984   }
1985 
1986   mbmi->motion_mode = SIMPLE_TRANSLATION;
1987   mbmi->num_proj_ref = 0;
1988   if (is_comp_pred) {
1989     // Only compound_average
1990     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1991     mbmi->comp_group_idx = 0;
1992     mbmi->compound_idx = 1;
1993   }
1994   set_default_interp_filters(mbmi, cm->features.interp_filter);
1995 
1996   const int mi_row = xd->mi_row;
1997   const int mi_col = xd->mi_col;
1998   av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize,
1999                                 AOM_PLANE_Y, AOM_PLANE_Y);
2000   int est_rate;
2001   int64_t est_dist;
2002   model_rd_sb_fn[MODELRD_CURVFIT](cpi, bsize, x, xd, 0, 0, &est_rate, &est_dist,
2003                                   NULL, NULL, NULL, NULL, NULL);
2004   return RDCOST(x->rdmult, rd_stats->rate + est_rate, est_dist);
2005 }
2006 
2007 // Represents a set of integers, from 0 to sizeof(int) * 8, as bits in
2008 // an integer. 0 for the i-th bit means that integer is excluded, 1 means
2009 // it is included.
mask_set_bit(int * mask,int index)2010 static INLINE void mask_set_bit(int *mask, int index) { *mask |= (1 << index); }
2011 
mask_check_bit(int mask,int index)2012 static INLINE bool mask_check_bit(int mask, int index) {
2013   return (mask >> index) & 0x1;
2014 }
2015 
2016 // Before performing the full MV search in handle_inter_mode, do a simple
2017 // translation search and see if we can eliminate any motion vectors.
2018 // Returns an integer where, if the i-th bit is set, it means that the i-th
2019 // motion vector should be searched. This is only set for NEAR_MV.
ref_mv_idx_to_search(AV1_COMP * const cpi,MACROBLOCK * x,RD_STATS * rd_stats,HandleInterModeArgs * const args,int64_t ref_best_rd,BLOCK_SIZE bsize,const int ref_set)2020 static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
2021                                 RD_STATS *rd_stats,
2022                                 HandleInterModeArgs *const args,
2023                                 int64_t ref_best_rd, BLOCK_SIZE bsize,
2024                                 const int ref_set) {
2025   AV1_COMMON *const cm = &cpi->common;
2026   const MACROBLOCKD *const xd = &x->e_mbd;
2027   const MB_MODE_INFO *const mbmi = xd->mi[0];
2028   const PREDICTION_MODE this_mode = mbmi->mode;
2029 
2030   // Only search indices if they have some chance of being good.
2031   int good_indices = 0;
2032   for (int i = 0; i < ref_set; ++i) {
2033     if (ref_mv_idx_early_breakout(&cpi->sf, &cpi->ref_frame_dist_info, x, args,
2034                                   ref_best_rd, i)) {
2035       continue;
2036     }
2037     mask_set_bit(&good_indices, i);
2038   }
2039 
2040   // Only prune in NEARMV mode, if the speed feature is set, and the block size
2041   // is large enough. If these conditions are not met, return all good indices
2042   // found so far.
2043   if (!cpi->sf.inter_sf.prune_mode_search_simple_translation)
2044     return good_indices;
2045   if (!have_nearmv_in_inter_mode(this_mode)) return good_indices;
2046   if (num_pels_log2_lookup[bsize] <= 6) return good_indices;
2047   // Do not prune when there is internal resizing. TODO(elliottk) fix this
2048   // so b/2384 can be resolved.
2049   if (av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[0])) ||
2050       (mbmi->ref_frame[1] > 0 &&
2051        av1_is_scaled(get_ref_scale_factors(cm, mbmi->ref_frame[1])))) {
2052     return good_indices;
2053   }
2054 
2055   // Calculate the RD cost for the motion vectors using simple translation.
2056   int64_t idx_rdcost[] = { INT64_MAX, INT64_MAX, INT64_MAX };
2057   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2058     // If this index is bad, ignore it.
2059     if (!mask_check_bit(good_indices, ref_mv_idx)) {
2060       continue;
2061     }
2062     idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
2063         cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
2064   }
2065   // Find the index with the best RD cost.
2066   int best_idx = 0;
2067   for (int i = 1; i < MAX_REF_MV_SEARCH; ++i) {
2068     if (idx_rdcost[i] < idx_rdcost[best_idx]) {
2069       best_idx = i;
2070     }
2071   }
2072   // Only include indices that are good and within a % of the best.
2073   const double dth = has_second_ref(mbmi) ? 1.05 : 1.001;
2074   // If the simple translation cost is not within this multiple of the
2075   // best RD, skip it. Note that the cutoff is derived experimentally.
2076   const double ref_dth = 5;
2077   int result = 0;
2078   for (int i = 0; i < ref_set; ++i) {
2079     if (mask_check_bit(good_indices, i) &&
2080         (1.0 * idx_rdcost[i]) / idx_rdcost[best_idx] < dth &&
2081         (1.0 * idx_rdcost[i]) / ref_best_rd < ref_dth) {
2082       mask_set_bit(&result, i);
2083     }
2084   }
2085   return result;
2086 }
2087 
2088 /*!\brief Motion mode information for inter mode search speedup.
2089  *
2090  * Used in a speed feature to search motion modes other than
2091  * SIMPLE_TRANSLATION only on winning candidates.
2092  */
2093 typedef struct motion_mode_candidate {
2094   /*!
2095    * Mode info for the motion mode candidate.
2096    */
2097   MB_MODE_INFO mbmi;
2098   /*!
2099    * Rate describing the cost of the motion vectors for this candidate.
2100    */
2101   int rate_mv;
2102   /*!
2103    * Rate before motion mode search and transform coding is applied.
2104    */
2105   int rate2_nocoeff;
2106   /*!
2107    * An integer value 0 or 1 which indicates whether or not to skip the motion
2108    * mode search and default to SIMPLE_TRANSLATION as a speed feature for this
2109    * candidate.
2110    */
2111   int skip_motion_mode;
2112   /*!
2113    * Total RD cost for this candidate.
2114    */
2115   int64_t rd_cost;
2116 } motion_mode_candidate;
2117 
2118 /*!\cond */
2119 typedef struct motion_mode_best_st_candidate {
2120   motion_mode_candidate motion_mode_cand[MAX_WINNER_MOTION_MODES];
2121   int num_motion_mode_cand;
2122 } motion_mode_best_st_candidate;
2123 
2124 // Checks if the current reference frame matches with neighbouring block's
2125 // (top/left) reference frames
ref_match_found_in_nb_blocks(MB_MODE_INFO * cur_mbmi,MB_MODE_INFO * nb_mbmi)2126 static AOM_INLINE int ref_match_found_in_nb_blocks(MB_MODE_INFO *cur_mbmi,
2127                                                    MB_MODE_INFO *nb_mbmi) {
2128   MV_REFERENCE_FRAME nb_ref_frames[2] = { nb_mbmi->ref_frame[0],
2129                                           nb_mbmi->ref_frame[1] };
2130   MV_REFERENCE_FRAME cur_ref_frames[2] = { cur_mbmi->ref_frame[0],
2131                                            cur_mbmi->ref_frame[1] };
2132   const int is_cur_comp_pred = has_second_ref(cur_mbmi);
2133   int match_found = 0;
2134 
2135   for (int i = 0; i < (is_cur_comp_pred + 1); i++) {
2136     if ((cur_ref_frames[i] == nb_ref_frames[0]) ||
2137         (cur_ref_frames[i] == nb_ref_frames[1]))
2138       match_found = 1;
2139   }
2140   return match_found;
2141 }
2142 
find_ref_match_in_above_nbs(const int total_mi_cols,MACROBLOCKD * xd)2143 static AOM_INLINE int find_ref_match_in_above_nbs(const int total_mi_cols,
2144                                                   MACROBLOCKD *xd) {
2145   if (!xd->up_available) return 1;
2146   const int mi_col = xd->mi_col;
2147   MB_MODE_INFO **cur_mbmi = xd->mi;
2148   // prev_row_mi points into the mi array, starting at the beginning of the
2149   // previous row.
2150   MB_MODE_INFO **prev_row_mi = xd->mi - mi_col - 1 * xd->mi_stride;
2151   const int end_col = AOMMIN(mi_col + xd->width, total_mi_cols);
2152   uint8_t mi_step;
2153   for (int above_mi_col = mi_col; above_mi_col < end_col;
2154        above_mi_col += mi_step) {
2155     MB_MODE_INFO **above_mi = prev_row_mi + above_mi_col;
2156     mi_step = mi_size_wide[above_mi[0]->bsize];
2157     int match_found = 0;
2158     if (is_inter_block(*above_mi))
2159       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *above_mi);
2160     if (match_found) return 1;
2161   }
2162   return 0;
2163 }
2164 
find_ref_match_in_left_nbs(const int total_mi_rows,MACROBLOCKD * xd)2165 static AOM_INLINE int find_ref_match_in_left_nbs(const int total_mi_rows,
2166                                                  MACROBLOCKD *xd) {
2167   if (!xd->left_available) return 1;
2168   const int mi_row = xd->mi_row;
2169   MB_MODE_INFO **cur_mbmi = xd->mi;
2170   // prev_col_mi points into the mi array, starting at the top of the
2171   // previous column
2172   MB_MODE_INFO **prev_col_mi = xd->mi - 1 - mi_row * xd->mi_stride;
2173   const int end_row = AOMMIN(mi_row + xd->height, total_mi_rows);
2174   uint8_t mi_step;
2175   for (int left_mi_row = mi_row; left_mi_row < end_row;
2176        left_mi_row += mi_step) {
2177     MB_MODE_INFO **left_mi = prev_col_mi + left_mi_row * xd->mi_stride;
2178     mi_step = mi_size_high[left_mi[0]->bsize];
2179     int match_found = 0;
2180     if (is_inter_block(*left_mi))
2181       match_found = ref_match_found_in_nb_blocks(*cur_mbmi, *left_mi);
2182     if (match_found) return 1;
2183   }
2184   return 0;
2185 }
2186 /*!\endcond */
2187 
2188 /*! \brief Struct used to hold TPL data to
2189  * narrow down parts of the inter mode search.
2190  */
2191 typedef struct {
2192   /*!
2193    * The best inter cost out of all of the reference frames.
2194    */
2195   int64_t best_inter_cost;
2196   /*!
2197    * The inter cost for each reference frame.
2198    */
2199   int64_t ref_inter_cost[INTER_REFS_PER_FRAME];
2200 } PruneInfoFromTpl;
2201 
2202 #if !CONFIG_REALTIME_ONLY
2203 // TODO(Remya): Check if get_tpl_stats_b() can be reused
get_block_level_tpl_stats(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int * valid_refs,PruneInfoFromTpl * inter_cost_info_from_tpl)2204 static AOM_INLINE void get_block_level_tpl_stats(
2205     AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
2206     PruneInfoFromTpl *inter_cost_info_from_tpl) {
2207   AV1_COMMON *const cm = &cpi->common;
2208 
2209   assert(IMPLIES(cpi->ppi->gf_group.size > 0,
2210                  cpi->gf_frame_index < cpi->ppi->gf_group.size));
2211   const int tpl_idx = cpi->gf_frame_index;
2212   TplParams *const tpl_data = &cpi->ppi->tpl_data;
2213   if (!av1_tpl_stats_ready(tpl_data, tpl_idx)) return;
2214   const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
2215   const TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
2216   const int mi_wide = mi_size_wide[bsize];
2217   const int mi_high = mi_size_high[bsize];
2218   const int tpl_stride = tpl_frame->stride;
2219   const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
2220   const int mi_col_sr =
2221       coded_to_superres_mi(mi_col, cm->superres_scale_denominator);
2222   const int mi_col_end_sr =
2223       coded_to_superres_mi(mi_col + mi_wide, cm->superres_scale_denominator);
2224   const int mi_cols_sr = av1_pixels_to_mi(cm->superres_upscaled_width);
2225 
2226   const int row_step = step;
2227   const int col_step_sr =
2228       coded_to_superres_mi(step, cm->superres_scale_denominator);
2229   for (int row = mi_row; row < AOMMIN(mi_row + mi_high, cm->mi_params.mi_rows);
2230        row += row_step) {
2231     for (int col = mi_col_sr; col < AOMMIN(mi_col_end_sr, mi_cols_sr);
2232          col += col_step_sr) {
2233       const TplDepStats *this_stats = &tpl_stats[av1_tpl_ptr_pos(
2234           row, col, tpl_stride, tpl_data->tpl_stats_block_mis_log2)];
2235 
2236       // Sums up the inter cost of corresponding ref frames
2237       for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2238         inter_cost_info_from_tpl->ref_inter_cost[ref_idx] +=
2239             this_stats->pred_error[ref_idx];
2240       }
2241     }
2242   }
2243 
2244   // Computes the best inter cost (minimum inter_cost)
2245   int64_t best_inter_cost = INT64_MAX;
2246   for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ref_idx++) {
2247     const int64_t cur_inter_cost =
2248         inter_cost_info_from_tpl->ref_inter_cost[ref_idx];
2249     // For invalid ref frames, cur_inter_cost = 0 and has to be handled while
2250     // calculating the minimum inter_cost
2251     if (cur_inter_cost != 0 && (cur_inter_cost < best_inter_cost) &&
2252         valid_refs[ref_idx])
2253       best_inter_cost = cur_inter_cost;
2254   }
2255   inter_cost_info_from_tpl->best_inter_cost = best_inter_cost;
2256 }
2257 #endif
2258 
prune_modes_based_on_tpl_stats(PruneInfoFromTpl * inter_cost_info_from_tpl,const int * refs,int ref_mv_idx,const PREDICTION_MODE this_mode,int prune_mode_level)2259 static AOM_INLINE int prune_modes_based_on_tpl_stats(
2260     PruneInfoFromTpl *inter_cost_info_from_tpl, const int *refs, int ref_mv_idx,
2261     const PREDICTION_MODE this_mode, int prune_mode_level) {
2262   const int have_newmv = have_newmv_in_inter_mode(this_mode);
2263   if ((prune_mode_level < 2) && have_newmv) return 0;
2264 
2265   const int64_t best_inter_cost = inter_cost_info_from_tpl->best_inter_cost;
2266   if (best_inter_cost == INT64_MAX) return 0;
2267 
2268   const int prune_level = prune_mode_level - 1;
2269   int64_t cur_inter_cost;
2270 
2271   const int is_globalmv =
2272       (this_mode == GLOBALMV) || (this_mode == GLOBAL_GLOBALMV);
2273   const int prune_index = is_globalmv ? MAX_REF_MV_SEARCH : ref_mv_idx;
2274 
2275   // Thresholds used for pruning:
2276   // Lower value indicates aggressive pruning and higher value indicates
2277   // conservative pruning which is set based on ref_mv_idx and speed feature.
2278   // 'prune_index' 0, 1, 2 corresponds to ref_mv indices 0, 1 and 2. prune_index
2279   // 3 corresponds to GLOBALMV/GLOBAL_GLOBALMV
2280   static const int tpl_inter_mode_prune_mul_factor[3][MAX_REF_MV_SEARCH + 1] = {
2281     { 6, 6, 6, 4 }, { 6, 4, 4, 4 }, { 5, 4, 4, 4 }
2282   };
2283 
2284   const int is_comp_pred = (refs[1] > INTRA_FRAME);
2285   if (!is_comp_pred) {
2286     cur_inter_cost = inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2287   } else {
2288     const int64_t inter_cost_ref0 =
2289         inter_cost_info_from_tpl->ref_inter_cost[refs[0] - 1];
2290     const int64_t inter_cost_ref1 =
2291         inter_cost_info_from_tpl->ref_inter_cost[refs[1] - 1];
2292     // Choose maximum inter_cost among inter_cost_ref0 and inter_cost_ref1 for
2293     // more aggressive pruning
2294     cur_inter_cost = AOMMAX(inter_cost_ref0, inter_cost_ref1);
2295   }
2296 
2297   // Prune the mode if cur_inter_cost is greater than threshold times
2298   // best_inter_cost
2299   if (cur_inter_cost >
2300       ((tpl_inter_mode_prune_mul_factor[prune_level][prune_index] *
2301         best_inter_cost) >>
2302        2))
2303     return 1;
2304   return 0;
2305 }
2306 
2307 /*!\brief High level function to select parameters for compound mode.
2308  *
2309  * \ingroup inter_mode_search
2310  * The main search functionality is done in the call to av1_compound_type_rd().
2311  *
2312  * \param[in]     cpi               Top-level encoder structure.
2313  * \param[in]     x                 Pointer to struct holding all the data for
2314  *                                  the current macroblock.
2315  * \param[in]     args              HandleInterModeArgs struct holding
2316  *                                  miscellaneous arguments for inter mode
2317  *                                  search. See the documentation for this
2318  *                                  struct for a description of each member.
2319  * \param[in]     ref_best_rd       Best RD found so far for this block.
2320  *                                  It is used for early termination of this
2321  *                                  search if the RD exceeds this value.
2322  * \param[in,out] cur_mv            Current motion vector.
2323  * \param[in]     bsize             Current block size.
2324  * \param[in,out] compmode_interinter_cost  RD of the selected interinter
2325                                     compound mode.
2326  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2327  *                                  allocated buffers for the compound
2328  *                                  predictors and masks in the compound type
2329  *                                  search.
2330  * \param[in,out] orig_dst          A prediction buffer to hold a computed
2331  *                                  prediction. This will eventually hold the
2332  *                                  final prediction, and the tmp_dst info will
2333  *                                  be copied here.
2334  * \param[in]     tmp_dst           A temporary prediction buffer to hold a
2335  *                                  computed prediction.
2336  * \param[in,out] rate_mv           The rate associated with the motion vectors.
2337  *                                  This will be modified if a motion search is
2338  *                                  done in the motion mode search.
2339  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2340  *                                  information.
2341  * \param[in,out] skip_rd           An array of length 2 where skip_rd[0] is the
2342  *                                  best total RD for a skip mode so far, and
2343  *                                  skip_rd[1] is the best RD for a skip mode so
2344  *                                  far in luma. This is used as a speed feature
2345  *                                  to skip the transform search if the computed
2346  *                                  skip RD for the current mode is not better
2347  *                                  than the best skip_rd so far.
2348  * \param[in,out] skip_build_pred   Indicates whether or not to build the inter
2349  *                                  predictor. If this is 0, the inter predictor
2350  *                                  has already been built and thus we can avoid
2351  *                                  repeating computation.
2352  * \return Returns 1 if this mode is worse than one already seen and 0 if it is
2353  * a viable candidate.
2354  */
process_compound_inter_mode(AV1_COMP * const cpi,MACROBLOCK * x,HandleInterModeArgs * args,int64_t ref_best_rd,int_mv * cur_mv,BLOCK_SIZE bsize,int * compmode_interinter_cost,const CompoundTypeRdBuffers * rd_buffers,const BUFFER_SET * orig_dst,const BUFFER_SET * tmp_dst,int * rate_mv,RD_STATS * rd_stats,int64_t * skip_rd,int * skip_build_pred)2355 static int process_compound_inter_mode(
2356     AV1_COMP *const cpi, MACROBLOCK *x, HandleInterModeArgs *args,
2357     int64_t ref_best_rd, int_mv *cur_mv, BLOCK_SIZE bsize,
2358     int *compmode_interinter_cost, const CompoundTypeRdBuffers *rd_buffers,
2359     const BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, int *rate_mv,
2360     RD_STATS *rd_stats, int64_t *skip_rd, int *skip_build_pred) {
2361   MACROBLOCKD *xd = &x->e_mbd;
2362   MB_MODE_INFO *mbmi = xd->mi[0];
2363   const AV1_COMMON *cm = &cpi->common;
2364   const int masked_compound_used = is_any_masked_compound_used(bsize) &&
2365                                    cm->seq_params->enable_masked_compound;
2366   int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
2367                          (1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
2368 
2369   const int num_planes = av1_num_planes(cm);
2370   const int mi_row = xd->mi_row;
2371   const int mi_col = xd->mi_col;
2372   int is_luma_interp_done = 0;
2373   set_default_interp_filters(mbmi, cm->features.interp_filter);
2374 
2375   int64_t best_rd_compound;
2376   int64_t rd_thresh;
2377   const int comp_type_rd_shift = COMP_TYPE_RD_THRESH_SHIFT;
2378   const int comp_type_rd_scale = COMP_TYPE_RD_THRESH_SCALE;
2379   rd_thresh = get_rd_thresh_from_best_rd(ref_best_rd, (1 << comp_type_rd_shift),
2380                                          comp_type_rd_scale);
2381   // Select compound type and any parameters related to that type
2382   // (for example, the mask parameters if it is a masked mode) and compute
2383   // the RD
2384   *compmode_interinter_cost = av1_compound_type_rd(
2385       cpi, x, args, bsize, cur_mv, mode_search_mask, masked_compound_used,
2386       orig_dst, tmp_dst, rd_buffers, rate_mv, &best_rd_compound, rd_stats,
2387       ref_best_rd, skip_rd[1], &is_luma_interp_done, rd_thresh);
2388   if (ref_best_rd < INT64_MAX &&
2389       (best_rd_compound >> comp_type_rd_shift) * comp_type_rd_scale >
2390           ref_best_rd) {
2391     restore_dst_buf(xd, *orig_dst, num_planes);
2392     return 1;
2393   }
2394 
2395   // Build only uv predictor for COMPOUND_AVERAGE.
2396   // Note there is no need to call av1_enc_build_inter_predictor
2397   // for luma if COMPOUND_AVERAGE is selected because it is the first
2398   // candidate in av1_compound_type_rd, which means it used the dst_buf
2399   // rather than the tmp_buf.
2400   if (mbmi->interinter_comp.type == COMPOUND_AVERAGE && is_luma_interp_done) {
2401     if (num_planes > 1) {
2402       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst, bsize,
2403                                     AOM_PLANE_U, num_planes - 1);
2404     }
2405     *skip_build_pred = 1;
2406   }
2407   return 0;
2408 }
2409 
2410 // Speed feature to prune out MVs that are similar to previous MVs if they
2411 // don't achieve the best RD advantage.
prune_ref_mv_idx_search(int ref_mv_idx,int best_ref_mv_idx,int_mv save_mv[MAX_REF_MV_SEARCH-1][2],MB_MODE_INFO * mbmi,int pruning_factor)2412 static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
2413                                    int_mv save_mv[MAX_REF_MV_SEARCH - 1][2],
2414                                    MB_MODE_INFO *mbmi, int pruning_factor) {
2415   int i;
2416   const int is_comp_pred = has_second_ref(mbmi);
2417   const int thr = (1 + is_comp_pred) << (pruning_factor + 1);
2418 
2419   // Skip the evaluation if an MV match is found.
2420   if (ref_mv_idx > 0) {
2421     for (int idx = 0; idx < ref_mv_idx; ++idx) {
2422       if (save_mv[idx][0].as_int == INVALID_MV) continue;
2423 
2424       int mv_diff = 0;
2425       for (i = 0; i < 1 + is_comp_pred; ++i) {
2426         mv_diff += abs(save_mv[idx][i].as_mv.row - mbmi->mv[i].as_mv.row) +
2427                    abs(save_mv[idx][i].as_mv.col - mbmi->mv[i].as_mv.col);
2428       }
2429 
2430       // If this mode is not the best one, and current MV is similar to
2431       // previous stored MV, terminate this ref_mv_idx evaluation.
2432       if (best_ref_mv_idx == -1 && mv_diff <= thr) return 1;
2433     }
2434   }
2435 
2436   if (ref_mv_idx < MAX_REF_MV_SEARCH - 1) {
2437     for (i = 0; i < is_comp_pred + 1; ++i)
2438       save_mv[ref_mv_idx][i].as_int = mbmi->mv[i].as_int;
2439   }
2440 
2441   return 0;
2442 }
2443 
2444 /*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
2445  *
2446  * \ingroup inter_mode_search
2447  *
2448  * Compares the sse of zero mv and the best sse found in single new_mv. If the
2449  * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
2450  * Else returns 0.
2451  *
2452  * Note that the sse of here comes from single_motion_search. So it is
2453  * interpolated with the filter in motion search, not the actual interpolation
2454  * filter used in encoding.
2455  *
2456  * \param[in]     fn_ptr            A table of function pointers to compute SSE.
2457  * \param[in]     x                 Pointer to struct holding all the data for
2458  *                                  the current macroblock.
2459  * \param[in]     bsize             The current block_size.
2460  * \param[in]     args              The args to handle_inter_mode, used to track
2461  *                                  the best SSE.
2462  * \return Returns 1 if zero_mv is pruned, 0 otherwise.
2463  */
prune_zero_mv_with_sse(const aom_variance_fn_ptr_t * fn_ptr,const MACROBLOCK * x,BLOCK_SIZE bsize,const HandleInterModeArgs * args)2464 static AOM_INLINE int prune_zero_mv_with_sse(
2465     const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
2466     const HandleInterModeArgs *args) {
2467   const MACROBLOCKD *xd = &x->e_mbd;
2468   const MB_MODE_INFO *mbmi = xd->mi[0];
2469 
2470   const int is_comp_pred = has_second_ref(mbmi);
2471   const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
2472 
2473   // Check that the global mv is the same as ZEROMV
2474   assert(mbmi->mv[0].as_int == 0);
2475   assert(IMPLIES(is_comp_pred, mbmi->mv[0].as_int == 0));
2476   assert(xd->global_motion[refs[0]].wmtype == TRANSLATION ||
2477          xd->global_motion[refs[0]].wmtype == IDENTITY);
2478 
2479   // Don't prune if we have invalid data
2480   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2481     assert(mbmi->mv[0].as_int == 0);
2482     if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
2483       return 0;
2484     }
2485   }
2486 
2487   // Sum up the sse of ZEROMV and best NEWMV
2488   unsigned int this_sse_sum = 0;
2489   unsigned int best_sse_sum = 0;
2490   for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
2491     const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
2492     const struct macroblockd_plane *pd = xd->plane;
2493     const struct buf_2d *src_buf = &p->src;
2494     const struct buf_2d *ref_buf = &pd->pre[idx];
2495     const uint8_t *src = src_buf->buf;
2496     const uint8_t *ref = ref_buf->buf;
2497     const int src_stride = src_buf->stride;
2498     const int ref_stride = ref_buf->stride;
2499 
2500     unsigned int this_sse;
2501     fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
2502     this_sse_sum += this_sse;
2503 
2504     const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
2505     best_sse_sum += best_sse;
2506   }
2507   if (this_sse_sum > best_sse_sum) {
2508     return 1;
2509   }
2510 
2511   return 0;
2512 }
2513 
2514 /*!\brief AV1 inter mode RD computation
2515  *
2516  * \ingroup inter_mode_search
2517  * Do the RD search for a given inter mode and compute all information relevant
2518  * to the input mode. It will compute the best MV,
2519  * compound parameters (if the mode is a compound mode) and interpolation filter
2520  * parameters.
2521  *
2522  * \param[in]     cpi               Top-level encoder structure.
2523  * \param[in]     tile_data         Pointer to struct holding adaptive
2524  *                                  data/contexts/models for the tile during
2525  *                                  encoding.
2526  * \param[in]     x                 Pointer to structure holding all the data
2527  *                                  for the current macroblock.
2528  * \param[in]     bsize             Current block size.
2529  * \param[in,out] rd_stats          Struct to keep track of the overall RD
2530  *                                  information.
2531  * \param[in,out] rd_stats_y        Struct to keep track of the RD information
2532  *                                  for only the Y plane.
2533  * \param[in,out] rd_stats_uv       Struct to keep track of the RD information
2534  *                                  for only the UV planes.
2535  * \param[in]     args              HandleInterModeArgs struct holding
2536  *                                  miscellaneous arguments for inter mode
2537  *                                  search. See the documentation for this
2538  *                                  struct for a description of each member.
2539  * \param[in]     ref_best_rd       Best RD found so far for this block.
2540  *                                  It is used for early termination of this
2541  *                                  search if the RD exceeds this value.
2542  * \param[in]     tmp_buf           Temporary buffer used to hold predictors
2543  *                                  built in this search.
2544  * \param[in,out] rd_buffers        CompoundTypeRdBuffers struct to hold all
2545  *                                  allocated buffers for the compound
2546  *                                  predictors and masks in the compound type
2547  *                                  search.
2548  * \param[in,out] best_est_rd       Estimated RD for motion mode search if
2549  *                                  do_tx_search (see below) is 0.
2550  * \param[in]     do_tx_search      Parameter to indicate whether or not to do
2551  *                                  a full transform search. This will compute
2552  *                                  an estimated RD for the modes without the
2553  *                                  transform search and later perform the full
2554  *                                  transform search on the best candidates.
2555  * \param[in,out] inter_modes_info  InterModesInfo struct to hold inter mode
2556  *                                  information to perform a full transform
2557  *                                  search only on winning candidates searched
2558  *                                  with an estimate for transform coding RD.
2559  * \param[in,out] motion_mode_cand  A motion_mode_candidate struct to store
2560  *                                  motion mode information used in a speed
2561  *                                  feature to search motion modes other than
2562  *                                  SIMPLE_TRANSLATION only on winning
2563  *                                  candidates.
2564  * \param[in,out] skip_rd           A length 2 array, where skip_rd[0] is the
2565  *                                  best total RD for a skip mode so far, and
2566  *                                  skip_rd[1] is the best RD for a skip mode so
2567  *                                  far in luma. This is used as a speed feature
2568  *                                  to skip the transform search if the computed
2569  *                                  skip RD for the current mode is not better
2570  *                                  than the best skip_rd so far.
2571  * \param[in]     inter_cost_info_from_tpl A PruneInfoFromTpl struct used to
2572  *                                         narrow down the search based on data
2573  *                                         collected in the TPL model.
2574  * \param[out]    yrd               Stores the rdcost corresponding to encoding
2575  *                                  the luma plane.
2576  *
2577  * \return The RD cost for the mode being searched.
2578  */
handle_inter_mode(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * x,BLOCK_SIZE bsize,RD_STATS * rd_stats,RD_STATS * rd_stats_y,RD_STATS * rd_stats_uv,HandleInterModeArgs * args,int64_t ref_best_rd,uint8_t * const tmp_buf,const CompoundTypeRdBuffers * rd_buffers,int64_t * best_est_rd,const int do_tx_search,InterModesInfo * inter_modes_info,motion_mode_candidate * motion_mode_cand,int64_t * skip_rd,PruneInfoFromTpl * inter_cost_info_from_tpl,int64_t * yrd)2579 static int64_t handle_inter_mode(
2580     AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *x,
2581     BLOCK_SIZE bsize, RD_STATS *rd_stats, RD_STATS *rd_stats_y,
2582     RD_STATS *rd_stats_uv, HandleInterModeArgs *args, int64_t ref_best_rd,
2583     uint8_t *const tmp_buf, const CompoundTypeRdBuffers *rd_buffers,
2584     int64_t *best_est_rd, const int do_tx_search,
2585     InterModesInfo *inter_modes_info, motion_mode_candidate *motion_mode_cand,
2586     int64_t *skip_rd, PruneInfoFromTpl *inter_cost_info_from_tpl,
2587     int64_t *yrd) {
2588   const AV1_COMMON *cm = &cpi->common;
2589   const int num_planes = av1_num_planes(cm);
2590   MACROBLOCKD *xd = &x->e_mbd;
2591   MB_MODE_INFO *mbmi = xd->mi[0];
2592   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2593   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2594   const int is_comp_pred = has_second_ref(mbmi);
2595   const PREDICTION_MODE this_mode = mbmi->mode;
2596 
2597 #if CONFIG_REALTIME_ONLY
2598   const int prune_modes_based_on_tpl = 0;
2599 #else   // CONFIG_REALTIME_ONLY
2600   const TplParams *const tpl_data = &cpi->ppi->tpl_data;
2601   const int prune_modes_based_on_tpl =
2602       cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
2603       av1_tpl_stats_ready(tpl_data, cpi->gf_frame_index);
2604 #endif  // CONFIG_REALTIME_ONLY
2605   int i;
2606   // Reference frames for this mode
2607   const int refs[2] = { mbmi->ref_frame[0],
2608                         (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) };
2609   int rate_mv = 0;
2610   int64_t rd = INT64_MAX;
2611   // Do first prediction into the destination buffer. Do the next
2612   // prediction into a temporary buffer. Then keep track of which one
2613   // of these currently holds the best predictor, and use the other
2614   // one for future predictions. In the end, copy from tmp_buf to
2615   // dst if necessary.
2616   struct macroblockd_plane *pd = xd->plane;
2617   const BUFFER_SET orig_dst = {
2618     { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
2619     { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
2620   };
2621   const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
2622                                  tmp_buf + 2 * MAX_SB_SQUARE },
2623                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } };
2624 
2625   int64_t ret_val = INT64_MAX;
2626   const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
2627   RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv;
2628   int64_t best_rd = INT64_MAX;
2629   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
2630   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2631   int64_t best_yrd = INT64_MAX;
2632   MB_MODE_INFO best_mbmi = *mbmi;
2633   int best_xskip_txfm = 0;
2634   int64_t newmv_ret_val = INT64_MAX;
2635   inter_mode_info mode_info[MAX_REF_MV_SEARCH];
2636 
2637   // Do not prune the mode based on inter cost from tpl if the current ref frame
2638   // is the winner ref in neighbouring blocks.
2639   int ref_match_found_in_above_nb = 0;
2640   int ref_match_found_in_left_nb = 0;
2641   if (prune_modes_based_on_tpl) {
2642     ref_match_found_in_above_nb =
2643         find_ref_match_in_above_nbs(cm->mi_params.mi_cols, xd);
2644     ref_match_found_in_left_nb =
2645         find_ref_match_in_left_nbs(cm->mi_params.mi_rows, xd);
2646   }
2647 
2648   // First, perform a simple translation search for each of the indices. If
2649   // an index performs well, it will be fully searched in the main loop
2650   // of this function.
2651   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
2652   // Save MV results from first 2 ref_mv_idx.
2653   int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
2654   int best_ref_mv_idx = -1;
2655   const int idx_mask =
2656       ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
2657   const int16_t mode_ctx =
2658       av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
2659   const ModeCosts *mode_costs = &x->mode_costs;
2660   const int ref_mv_cost = cost_mv_ref(mode_costs, this_mode, mode_ctx);
2661   const int base_rate =
2662       args->ref_frame_cost + args->single_comp_cost + ref_mv_cost;
2663 
2664   for (i = 0; i < MAX_REF_MV_SEARCH - 1; ++i) {
2665     save_mv[i][0].as_int = INVALID_MV;
2666     save_mv[i][1].as_int = INVALID_MV;
2667   }
2668 
2669   // Main loop of this function. This will  iterate over all of the ref mvs
2670   // in the dynamic reference list and do the following:
2671   //    1.) Get the current MV. Create newmv MV if necessary
2672   //    2.) Search compound type and parameters if applicable
2673   //    3.) Do interpolation filter search
2674   //    4.) Build the inter predictor
2675   //    5.) Pick the motion mode (SIMPLE_TRANSLATION, OBMC_CAUSAL,
2676   //        WARPED_CAUSAL)
2677   //    6.) Update stats if best so far
2678   for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
2679     mbmi->ref_mv_idx = ref_mv_idx;
2680 
2681     mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
2682     mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
2683     const int drl_cost = get_drl_cost(
2684         mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
2685     mode_info[ref_mv_idx].drl_cost = drl_cost;
2686     mode_info[ref_mv_idx].skip = 0;
2687 
2688     if (!mask_check_bit(idx_mask, ref_mv_idx)) {
2689       // MV did not perform well in simple translation search. Skip it.
2690       continue;
2691     }
2692     if (prune_modes_based_on_tpl && !ref_match_found_in_above_nb &&
2693         !ref_match_found_in_left_nb && (ref_best_rd != INT64_MAX)) {
2694       // Skip mode if TPL model indicates it will not be beneficial.
2695       if (prune_modes_based_on_tpl_stats(
2696               inter_cost_info_from_tpl, refs, ref_mv_idx, this_mode,
2697               cpi->sf.inter_sf.prune_inter_modes_based_on_tpl))
2698         continue;
2699     }
2700     av1_init_rd_stats(rd_stats);
2701 
2702     // Initialize compound mode data
2703     mbmi->interinter_comp.type = COMPOUND_AVERAGE;
2704     mbmi->comp_group_idx = 0;
2705     mbmi->compound_idx = 1;
2706     if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME;
2707 
2708     mbmi->num_proj_ref = 0;
2709     mbmi->motion_mode = SIMPLE_TRANSLATION;
2710 
2711     // Compute cost for signalling this DRL index
2712     rd_stats->rate = base_rate;
2713     rd_stats->rate += drl_cost;
2714 
2715     int rs = 0;
2716     int compmode_interinter_cost = 0;
2717 
2718     int_mv cur_mv[2];
2719 
2720     // TODO(Cherma): Extend this speed feature to support compound mode
2721     int skip_repeated_ref_mv =
2722         is_comp_pred ? 0 : cpi->sf.inter_sf.skip_repeated_ref_mv;
2723     // Generate the current mv according to the prediction mode
2724     if (!build_cur_mv(cur_mv, this_mode, cm, x, skip_repeated_ref_mv)) {
2725       continue;
2726     }
2727 
2728     // The above call to build_cur_mv does not handle NEWMV modes. Build
2729     // the mv here if we have NEWMV for any predictors.
2730     if (have_newmv_in_inter_mode(this_mode)) {
2731 #if CONFIG_COLLECT_COMPONENT_TIMING
2732       start_timing(cpi, handle_newmv_time);
2733 #endif
2734       newmv_ret_val =
2735           handle_newmv(cpi, x, bsize, cur_mv, &rate_mv, args, mode_info);
2736 #if CONFIG_COLLECT_COMPONENT_TIMING
2737       end_timing(cpi, handle_newmv_time);
2738 #endif
2739 
2740       if (newmv_ret_val != 0) continue;
2741 
2742       if (is_inter_singleref_mode(this_mode) &&
2743           cur_mv[0].as_int != INVALID_MV) {
2744         const MV_REFERENCE_FRAME ref = refs[0];
2745         const unsigned int this_sse = x->pred_sse[ref];
2746         if (this_sse < args->best_single_sse_in_refs[ref]) {
2747           args->best_single_sse_in_refs[ref] = this_sse;
2748         }
2749       }
2750 
2751       rd_stats->rate += rate_mv;
2752     }
2753     // Copy the motion vector for this mode into mbmi struct
2754     for (i = 0; i < is_comp_pred + 1; ++i) {
2755       mbmi->mv[i].as_int = cur_mv[i].as_int;
2756     }
2757 
2758     if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd &&
2759         mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) {
2760       continue;
2761     }
2762 
2763     // Skip the rest of the search if prune_ref_mv_idx_search speed feature
2764     // is enabled, and the current MV is similar to a previous one.
2765     if (cpi->sf.inter_sf.prune_ref_mv_idx_search && is_comp_pred &&
2766         prune_ref_mv_idx_search(ref_mv_idx, best_ref_mv_idx, save_mv, mbmi,
2767                                 cpi->sf.inter_sf.prune_ref_mv_idx_search))
2768       continue;
2769 
2770     if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
2771         cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH &&
2772         (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
2773       if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args)) {
2774         continue;
2775       }
2776     }
2777 
2778     int skip_build_pred = 0;
2779     const int mi_row = xd->mi_row;
2780     const int mi_col = xd->mi_col;
2781 
2782     // Handle a compound predictor, continue if it is determined this
2783     // cannot be the best compound mode
2784     if (is_comp_pred) {
2785 #if CONFIG_COLLECT_COMPONENT_TIMING
2786       start_timing(cpi, compound_type_rd_time);
2787 #endif
2788       const int not_best_mode = process_compound_inter_mode(
2789           cpi, x, args, ref_best_rd, cur_mv, bsize, &compmode_interinter_cost,
2790           rd_buffers, &orig_dst, &tmp_dst, &rate_mv, rd_stats, skip_rd,
2791           &skip_build_pred);
2792 #if CONFIG_COLLECT_COMPONENT_TIMING
2793       end_timing(cpi, compound_type_rd_time);
2794 #endif
2795       if (not_best_mode) continue;
2796     }
2797 
2798 #if CONFIG_COLLECT_COMPONENT_TIMING
2799     start_timing(cpi, interpolation_filter_search_time);
2800 #endif
2801     // Determine the interpolation filter for this mode
2802     ret_val = av1_interpolation_filter_search(
2803         x, cpi, tile_data, bsize, &tmp_dst, &orig_dst, &rd, &rs,
2804         &skip_build_pred, args, ref_best_rd);
2805 #if CONFIG_COLLECT_COMPONENT_TIMING
2806     end_timing(cpi, interpolation_filter_search_time);
2807 #endif
2808     if (args->modelled_rd != NULL && !is_comp_pred) {
2809       args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd;
2810     }
2811     if (ret_val != 0) {
2812       restore_dst_buf(xd, orig_dst, num_planes);
2813       continue;
2814     } else if (cpi->sf.inter_sf.model_based_post_interp_filter_breakout &&
2815                ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) {
2816       restore_dst_buf(xd, orig_dst, num_planes);
2817       continue;
2818     }
2819 
2820     // Compute modelled RD if enabled
2821     if (args->modelled_rd != NULL) {
2822       if (is_comp_pred) {
2823         const int mode0 = compound_ref0_mode(this_mode);
2824         const int mode1 = compound_ref1_mode(this_mode);
2825         const int64_t mrd =
2826             AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]],
2827                    args->modelled_rd[mode1][ref_mv_idx][refs[1]]);
2828         if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) {
2829           restore_dst_buf(xd, orig_dst, num_planes);
2830           continue;
2831         }
2832       }
2833     }
2834     rd_stats->rate += compmode_interinter_cost;
2835     if (skip_build_pred != 1) {
2836       // Build this inter predictor if it has not been previously built
2837       av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, &orig_dst, bsize, 0,
2838                                     av1_num_planes(cm) - 1);
2839     }
2840 
2841 #if CONFIG_COLLECT_COMPONENT_TIMING
2842     start_timing(cpi, motion_mode_rd_time);
2843 #endif
2844     int rate2_nocoeff = rd_stats->rate;
2845     // Determine the motion mode. This will be one of SIMPLE_TRANSLATION,
2846     // OBMC_CAUSAL or WARPED_CAUSAL
2847     int64_t this_yrd;
2848     ret_val = motion_mode_rd(cpi, tile_data, x, bsize, rd_stats, rd_stats_y,
2849                              rd_stats_uv, args, ref_best_rd, skip_rd, &rate_mv,
2850                              &orig_dst, best_est_rd, do_tx_search,
2851                              inter_modes_info, 0, &this_yrd);
2852 #if CONFIG_COLLECT_COMPONENT_TIMING
2853     end_timing(cpi, motion_mode_rd_time);
2854 #endif
2855     assert(
2856         IMPLIES(!av1_check_newmv_joint_nonzero(cm, x), ret_val == INT64_MAX));
2857 
2858     if (ret_val != INT64_MAX) {
2859       int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2860       const THR_MODES mode_enum = get_prediction_mode_idx(
2861           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
2862       // Collect mode stats for multiwinner mode processing
2863       store_winner_mode_stats(&cpi->common, x, mbmi, rd_stats, rd_stats_y,
2864                               rd_stats_uv, mode_enum, NULL, bsize, tmp_rd,
2865                               cpi->sf.winner_mode_sf.multi_winner_mode_type,
2866                               do_tx_search);
2867       if (tmp_rd < best_rd) {
2868         best_yrd = this_yrd;
2869         // Update the best rd stats if we found the best mode so far
2870         best_rd_stats = *rd_stats;
2871         best_rd_stats_y = *rd_stats_y;
2872         best_rd_stats_uv = *rd_stats_uv;
2873         best_rd = tmp_rd;
2874         best_mbmi = *mbmi;
2875         best_xskip_txfm = txfm_info->skip_txfm;
2876         memcpy(best_blk_skip, txfm_info->blk_skip,
2877                sizeof(best_blk_skip[0]) * xd->height * xd->width);
2878         av1_copy_array(best_tx_type_map, xd->tx_type_map,
2879                        xd->height * xd->width);
2880         motion_mode_cand->rate_mv = rate_mv;
2881         motion_mode_cand->rate2_nocoeff = rate2_nocoeff;
2882       }
2883 
2884       if (tmp_rd < ref_best_rd) {
2885         ref_best_rd = tmp_rd;
2886         best_ref_mv_idx = ref_mv_idx;
2887       }
2888     }
2889     restore_dst_buf(xd, orig_dst, num_planes);
2890   }
2891 
2892   if (best_rd == INT64_MAX) return INT64_MAX;
2893 
2894   // re-instate status of the best choice
2895   *rd_stats = best_rd_stats;
2896   *rd_stats_y = best_rd_stats_y;
2897   *rd_stats_uv = best_rd_stats_uv;
2898   *yrd = best_yrd;
2899   *mbmi = best_mbmi;
2900   txfm_info->skip_txfm = best_xskip_txfm;
2901   assert(IMPLIES(mbmi->comp_group_idx == 1,
2902                  mbmi->interinter_comp.type != COMPOUND_AVERAGE));
2903   memcpy(txfm_info->blk_skip, best_blk_skip,
2904          sizeof(best_blk_skip[0]) * xd->height * xd->width);
2905   av1_copy_array(xd->tx_type_map, best_tx_type_map, xd->height * xd->width);
2906 
2907   rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
2908 
2909   return rd_stats->rdcost;
2910 }
2911 
2912 /*!\brief Search for the best intrabc predictor
2913  *
2914  * \ingroup intra_mode_search
2915  * \callergraph
2916  * This function performs a motion search to find the best intrabc predictor.
2917  *
2918  * \returns Returns the best overall rdcost (including the non-intrabc modes
2919  * search before this function).
2920  */
rd_pick_intrabc_mode_sb(const AV1_COMP * cpi,MACROBLOCK * x,PICK_MODE_CONTEXT * ctx,RD_STATS * rd_stats,BLOCK_SIZE bsize,int64_t best_rd)2921 static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
2922                                        PICK_MODE_CONTEXT *ctx,
2923                                        RD_STATS *rd_stats, BLOCK_SIZE bsize,
2924                                        int64_t best_rd) {
2925   const AV1_COMMON *const cm = &cpi->common;
2926   if (!av1_allow_intrabc(cm) || !cpi->oxcf.kf_cfg.enable_intrabc ||
2927       cpi->sf.rt_sf.use_nonrd_pick_mode)
2928     return INT64_MAX;
2929   const int num_planes = av1_num_planes(cm);
2930 
2931   MACROBLOCKD *const xd = &x->e_mbd;
2932   const TileInfo *tile = &xd->tile;
2933   MB_MODE_INFO *mbmi = xd->mi[0];
2934   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
2935 
2936   const int mi_row = xd->mi_row;
2937   const int mi_col = xd->mi_col;
2938   const int w = block_size_wide[bsize];
2939   const int h = block_size_high[bsize];
2940   const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
2941   const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
2942 
2943   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
2944   const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
2945   av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
2946                    xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
2947                    mbmi_ext->mode_context);
2948   // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
2949   // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
2950   av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
2951   int_mv nearestmv, nearmv;
2952   av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv,
2953                                    0);
2954 
2955   if (nearestmv.as_int == INVALID_MV) {
2956     nearestmv.as_int = 0;
2957   }
2958   if (nearmv.as_int == INVALID_MV) {
2959     nearmv.as_int = 0;
2960   }
2961 
2962   int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
2963   if (dv_ref.as_int == 0) {
2964     av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
2965   }
2966   // Ref DV should not have sub-pel.
2967   assert((dv_ref.as_mv.col & 7) == 0);
2968   assert((dv_ref.as_mv.row & 7) == 0);
2969   mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref;
2970 
2971   struct buf_2d yv12_mb[MAX_MB_PLANE];
2972   av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, NULL, NULL, num_planes);
2973   for (int i = 0; i < num_planes; ++i) {
2974     xd->plane[i].pre[0] = yv12_mb[i];
2975   }
2976 
2977   enum IntrabcMotionDirection {
2978     IBC_MOTION_ABOVE,
2979     IBC_MOTION_LEFT,
2980     IBC_MOTION_DIRECTIONS
2981   };
2982 
2983   MB_MODE_INFO best_mbmi = *mbmi;
2984   RD_STATS best_rdstats = *rd_stats;
2985   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 };
2986   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
2987   av1_copy_array(best_tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
2988 
2989   FULLPEL_MOTION_SEARCH_PARAMS fullms_params;
2990   const search_site_config *lookahead_search_sites =
2991       cpi->mv_search_params.search_site_cfg[SS_CFG_LOOKAHEAD];
2992   av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
2993                                      &dv_ref.as_mv, lookahead_search_sites,
2994                                      /*fine_search_interval=*/0);
2995   const IntraBCMVCosts *const dv_costs = x->dv_costs;
2996   av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
2997 
2998   for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
2999        dir < IBC_MOTION_DIRECTIONS; ++dir) {
3000     switch (dir) {
3001       case IBC_MOTION_ABOVE:
3002         fullms_params.mv_limits.col_min =
3003             (tile->mi_col_start - mi_col) * MI_SIZE;
3004         fullms_params.mv_limits.col_max =
3005             (tile->mi_col_end - mi_col) * MI_SIZE - w;
3006         fullms_params.mv_limits.row_min =
3007             (tile->mi_row_start - mi_row) * MI_SIZE;
3008         fullms_params.mv_limits.row_max =
3009             (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
3010         break;
3011       case IBC_MOTION_LEFT:
3012         fullms_params.mv_limits.col_min =
3013             (tile->mi_col_start - mi_col) * MI_SIZE;
3014         fullms_params.mv_limits.col_max =
3015             (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
3016         // TODO(aconverse@google.com): Minimize the overlap between above and
3017         // left areas.
3018         fullms_params.mv_limits.row_min =
3019             (tile->mi_row_start - mi_row) * MI_SIZE;
3020         int bottom_coded_mi_edge =
3021             AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
3022         fullms_params.mv_limits.row_max =
3023             (bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
3024         break;
3025       default: assert(0);
3026     }
3027     assert(fullms_params.mv_limits.col_min >= fullms_params.mv_limits.col_min);
3028     assert(fullms_params.mv_limits.col_max <= fullms_params.mv_limits.col_max);
3029     assert(fullms_params.mv_limits.row_min >= fullms_params.mv_limits.row_min);
3030     assert(fullms_params.mv_limits.row_max <= fullms_params.mv_limits.row_max);
3031 
3032     av1_set_mv_search_range(&fullms_params.mv_limits, &dv_ref.as_mv);
3033 
3034     if (fullms_params.mv_limits.col_max < fullms_params.mv_limits.col_min ||
3035         fullms_params.mv_limits.row_max < fullms_params.mv_limits.row_min) {
3036       continue;
3037     }
3038 
3039     const int step_param = cpi->mv_search_params.mv_step_param;
3040     const FULLPEL_MV start_mv = get_fullmv_from_mv(&dv_ref.as_mv);
3041     IntraBCHashInfo *intrabc_hash_info = &x->intrabc_hash_info;
3042     int_mv best_mv, best_hash_mv;
3043 
3044     int bestsme = av1_full_pixel_search(start_mv, &fullms_params, step_param,
3045                                         NULL, &best_mv.as_fullmv, NULL);
3046     const int hashsme = av1_intrabc_hash_search(
3047         cpi, xd, &fullms_params, intrabc_hash_info, &best_hash_mv.as_fullmv);
3048     if (hashsme < bestsme) {
3049       best_mv = best_hash_mv;
3050       bestsme = hashsme;
3051     }
3052 
3053     if (bestsme == INT_MAX) continue;
3054     const MV dv = get_mv_from_fullmv(&best_mv.as_fullmv);
3055     if (!av1_is_fullmv_in_range(&fullms_params.mv_limits,
3056                                 get_fullmv_from_mv(&dv)))
3057       continue;
3058     if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
3059                          cm->seq_params->mib_size_log2))
3060       continue;
3061 
3062     // DV should not have sub-pel.
3063     assert((dv.col & 7) == 0);
3064     assert((dv.row & 7) == 0);
3065     memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info));
3066     mbmi->filter_intra_mode_info.use_filter_intra = 0;
3067     mbmi->use_intrabc = 1;
3068     mbmi->mode = DC_PRED;
3069     mbmi->uv_mode = UV_DC_PRED;
3070     mbmi->motion_mode = SIMPLE_TRANSLATION;
3071     mbmi->mv[0].as_mv = dv;
3072     mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
3073     mbmi->skip_txfm = 0;
3074     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3075                                   av1_num_planes(cm) - 1);
3076 
3077     // TODO(aconverse@google.com): The full motion field defining discount
3078     // in MV_COST_WEIGHT is too large. Explore other values.
3079     const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
3080                                         dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
3081     const int rate_mode = x->mode_costs.intrabc_cost[1];
3082     RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
3083     if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
3084                          &rd_stats_uv, rate_mode + rate_mv, INT64_MAX))
3085       continue;
3086     rd_stats_yuv.rdcost =
3087         RDCOST(x->rdmult, rd_stats_yuv.rate, rd_stats_yuv.dist);
3088     if (rd_stats_yuv.rdcost < best_rd) {
3089       best_rd = rd_stats_yuv.rdcost;
3090       best_mbmi = *mbmi;
3091       best_rdstats = rd_stats_yuv;
3092       memcpy(best_blk_skip, txfm_info->blk_skip,
3093              sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3094       av1_copy_array(best_tx_type_map, xd->tx_type_map, xd->height * xd->width);
3095     }
3096   }
3097   *mbmi = best_mbmi;
3098   *rd_stats = best_rdstats;
3099   memcpy(txfm_info->blk_skip, best_blk_skip,
3100          sizeof(txfm_info->blk_skip[0]) * xd->height * xd->width);
3101   av1_copy_array(xd->tx_type_map, best_tx_type_map, ctx->num_4x4_blk);
3102 #if CONFIG_RD_DEBUG
3103   mbmi->rd_stats = *rd_stats;
3104 #endif
3105   return best_rd;
3106 }
3107 
3108 // TODO(chiyotsai@google.com): We are using struct $struct_name instead of their
3109 // typedef here because Doxygen doesn't know about the typedefs yet. So using
3110 // the typedef will prevent doxygen from finding this function and generating
3111 // the callgraph. Once documents for AV1_COMP and MACROBLOCK are added to
3112 // doxygen, we can revert back to using the typedefs.
av1_rd_pick_intra_mode_sb(const struct AV1_COMP * cpi,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)3113 void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x,
3114                                struct RD_STATS *rd_cost, BLOCK_SIZE bsize,
3115                                PICK_MODE_CONTEXT *ctx, int64_t best_rd) {
3116   const AV1_COMMON *const cm = &cpi->common;
3117   MACROBLOCKD *const xd = &x->e_mbd;
3118   MB_MODE_INFO *const mbmi = xd->mi[0];
3119   const int num_planes = av1_num_planes(cm);
3120   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3121   int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0;
3122   int y_skip_txfm = 0, uv_skip_txfm = 0;
3123   int64_t dist_y = 0, dist_uv = 0;
3124 
3125   ctx->rd_stats.skip_txfm = 0;
3126   mbmi->ref_frame[0] = INTRA_FRAME;
3127   mbmi->ref_frame[1] = NONE_FRAME;
3128   mbmi->use_intrabc = 0;
3129   mbmi->mv[0].as_int = 0;
3130   mbmi->skip_mode = 0;
3131 
3132   const int64_t intra_yrd =
3133       av1_rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y,
3134                                  &y_skip_txfm, bsize, best_rd, ctx);
3135 
3136   // Initialize default mode evaluation params
3137   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
3138 
3139   if (intra_yrd < best_rd) {
3140     // Search intra modes for uv planes if needed
3141     if (num_planes > 1) {
3142       // Set up the tx variables for reproducing the y predictions in case we
3143       // need it for chroma-from-luma.
3144       if (xd->is_chroma_ref && store_cfl_required_rdo(cm, x)) {
3145         memcpy(txfm_info->blk_skip, ctx->blk_skip,
3146                sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3147         av1_copy_array(xd->tx_type_map, ctx->tx_type_map, ctx->num_4x4_blk);
3148       }
3149       const TX_SIZE max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
3150       av1_rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly,
3151                                   &dist_uv, &uv_skip_txfm, bsize,
3152                                   max_uv_tx_size);
3153     }
3154 
3155     // Intra block is always coded as non-skip
3156     rd_cost->rate =
3157         rate_y + rate_uv +
3158         x->mode_costs.skip_txfm_cost[av1_get_skip_txfm_context(xd)][0];
3159     rd_cost->dist = dist_y + dist_uv;
3160     rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3161     rd_cost->skip_txfm = 0;
3162   } else {
3163     rd_cost->rate = INT_MAX;
3164   }
3165 
3166   if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd)
3167     best_rd = rd_cost->rdcost;
3168   if (rd_pick_intrabc_mode_sb(cpi, x, ctx, rd_cost, bsize, best_rd) < best_rd) {
3169     ctx->rd_stats.skip_txfm = mbmi->skip_txfm;
3170     memcpy(ctx->blk_skip, txfm_info->blk_skip,
3171            sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
3172     assert(rd_cost->rate != INT_MAX);
3173   }
3174   if (rd_cost->rate == INT_MAX) return;
3175 
3176   ctx->mic = *xd->mi[0];
3177   av1_copy_mbmi_ext_to_mbmi_ext_frame(&ctx->mbmi_ext_best, &x->mbmi_ext,
3178                                       av1_ref_frame_type(xd->mi[0]->ref_frame));
3179   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3180 }
3181 
3182 static AOM_INLINE void calc_target_weighted_pred(
3183     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
3184     const uint8_t *above, int above_stride, const uint8_t *left,
3185     int left_stride);
3186 
rd_pick_skip_mode(RD_STATS * rd_cost,InterModeSearchState * search_state,const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE])3187 static AOM_INLINE void rd_pick_skip_mode(
3188     RD_STATS *rd_cost, InterModeSearchState *search_state,
3189     const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
3190     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) {
3191   const AV1_COMMON *const cm = &cpi->common;
3192   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
3193   const int num_planes = av1_num_planes(cm);
3194   MACROBLOCKD *const xd = &x->e_mbd;
3195   MB_MODE_INFO *const mbmi = xd->mi[0];
3196 
3197   x->compound_idx = 1;  // COMPOUND_AVERAGE
3198   RD_STATS skip_mode_rd_stats;
3199   av1_invalid_rd_stats(&skip_mode_rd_stats);
3200 
3201   if (skip_mode_info->ref_frame_idx_0 == INVALID_IDX ||
3202       skip_mode_info->ref_frame_idx_1 == INVALID_IDX) {
3203     return;
3204   }
3205 
3206   const MV_REFERENCE_FRAME ref_frame =
3207       LAST_FRAME + skip_mode_info->ref_frame_idx_0;
3208   const MV_REFERENCE_FRAME second_ref_frame =
3209       LAST_FRAME + skip_mode_info->ref_frame_idx_1;
3210   const PREDICTION_MODE this_mode = NEAREST_NEARESTMV;
3211   const THR_MODES mode_index =
3212       get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame);
3213 
3214   if (mode_index == THR_INVALID) {
3215     return;
3216   }
3217 
3218   if ((!cpi->oxcf.ref_frm_cfg.enable_onesided_comp ||
3219        cpi->sf.inter_sf.disable_onesided_comp) &&
3220       cpi->all_one_sided_refs) {
3221     return;
3222   }
3223 
3224   mbmi->mode = this_mode;
3225   mbmi->uv_mode = UV_DC_PRED;
3226   mbmi->ref_frame[0] = ref_frame;
3227   mbmi->ref_frame[1] = second_ref_frame;
3228   const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
3229   if (x->mbmi_ext.ref_mv_count[ref_frame_type] == UINT8_MAX) {
3230     MB_MODE_INFO_EXT *mbmi_ext = &x->mbmi_ext;
3231     if (mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX ||
3232         mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) {
3233       return;
3234     }
3235     av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count,
3236                      xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3237                      mbmi_ext->mode_context);
3238     // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3239     // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3240     av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame_type);
3241   }
3242 
3243   assert(this_mode == NEAREST_NEARESTMV);
3244   if (!build_cur_mv(mbmi->mv, this_mode, cm, x, 0)) {
3245     return;
3246   }
3247 
3248   mbmi->filter_intra_mode_info.use_filter_intra = 0;
3249   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
3250   mbmi->comp_group_idx = 0;
3251   mbmi->compound_idx = x->compound_idx;
3252   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
3253   mbmi->motion_mode = SIMPLE_TRANSLATION;
3254   mbmi->ref_mv_idx = 0;
3255   mbmi->skip_mode = mbmi->skip_txfm = 1;
3256   mbmi->palette_mode_info.palette_size[0] = 0;
3257   mbmi->palette_mode_info.palette_size[1] = 0;
3258 
3259   set_default_interp_filters(mbmi, cm->features.interp_filter);
3260 
3261   set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3262   for (int i = 0; i < num_planes; i++) {
3263     xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3264     xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3265   }
3266 
3267   BUFFER_SET orig_dst;
3268   for (int i = 0; i < num_planes; i++) {
3269     orig_dst.plane[i] = xd->plane[i].dst.buf;
3270     orig_dst.stride[i] = xd->plane[i].dst.stride;
3271   }
3272 
3273   // Obtain the rdcost for skip_mode.
3274   skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, &orig_dst);
3275 
3276   // Compare the use of skip_mode with the best intra/inter mode obtained.
3277   const int skip_mode_ctx = av1_get_skip_mode_context(xd);
3278   int64_t best_intra_inter_mode_cost = INT64_MAX;
3279   if (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) {
3280     const ModeCosts *mode_costs = &x->mode_costs;
3281     best_intra_inter_mode_cost = RDCOST(
3282         x->rdmult, rd_cost->rate + mode_costs->skip_mode_cost[skip_mode_ctx][0],
3283         rd_cost->dist);
3284     // Account for non-skip mode rate in total rd stats
3285     rd_cost->rate += mode_costs->skip_mode_cost[skip_mode_ctx][0];
3286     av1_rd_cost_update(x->rdmult, rd_cost);
3287   }
3288 
3289   if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost &&
3290       (!xd->lossless[mbmi->segment_id] || skip_mode_rd_stats.dist == 0)) {
3291     assert(mode_index != THR_INVALID);
3292     search_state->best_mbmode.skip_mode = 1;
3293     search_state->best_mbmode = *mbmi;
3294     memset(search_state->best_mbmode.inter_tx_size,
3295            search_state->best_mbmode.tx_size,
3296            sizeof(search_state->best_mbmode.inter_tx_size));
3297     set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
3298                   search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
3299                   xd);
3300     search_state->best_mode_index = mode_index;
3301 
3302     // Update rd_cost
3303     rd_cost->rate = skip_mode_rd_stats.rate;
3304     rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist;
3305     rd_cost->rdcost = skip_mode_rd_stats.rdcost;
3306 
3307     search_state->best_rd = rd_cost->rdcost;
3308     search_state->best_skip2 = 1;
3309     search_state->best_mode_skippable = 1;
3310 
3311     x->txfm_search_info.skip_txfm = 1;
3312   }
3313 }
3314 
3315 // Get winner mode stats of given mode index
get_winner_mode_stats(MACROBLOCK * x,MB_MODE_INFO * best_mbmode,RD_STATS * best_rd_cost,int best_rate_y,int best_rate_uv,THR_MODES * best_mode_index,RD_STATS ** winner_rd_cost,int * winner_rate_y,int * winner_rate_uv,THR_MODES * winner_mode_index,MULTI_WINNER_MODE_TYPE multi_winner_mode_type,int mode_idx)3316 static AOM_INLINE MB_MODE_INFO *get_winner_mode_stats(
3317     MACROBLOCK *x, MB_MODE_INFO *best_mbmode, RD_STATS *best_rd_cost,
3318     int best_rate_y, int best_rate_uv, THR_MODES *best_mode_index,
3319     RD_STATS **winner_rd_cost, int *winner_rate_y, int *winner_rate_uv,
3320     THR_MODES *winner_mode_index, MULTI_WINNER_MODE_TYPE multi_winner_mode_type,
3321     int mode_idx) {
3322   MB_MODE_INFO *winner_mbmi;
3323   if (multi_winner_mode_type) {
3324     assert(mode_idx >= 0 && mode_idx < x->winner_mode_count);
3325     WinnerModeStats *winner_mode_stat = &x->winner_mode_stats[mode_idx];
3326     winner_mbmi = &winner_mode_stat->mbmi;
3327 
3328     *winner_rd_cost = &winner_mode_stat->rd_cost;
3329     *winner_rate_y = winner_mode_stat->rate_y;
3330     *winner_rate_uv = winner_mode_stat->rate_uv;
3331     *winner_mode_index = winner_mode_stat->mode_index;
3332   } else {
3333     winner_mbmi = best_mbmode;
3334     *winner_rd_cost = best_rd_cost;
3335     *winner_rate_y = best_rate_y;
3336     *winner_rate_uv = best_rate_uv;
3337     *winner_mode_index = *best_mode_index;
3338   }
3339   return winner_mbmi;
3340 }
3341 
3342 // speed feature: fast intra/inter transform type search
3343 // Used for speed >= 2
3344 // When this speed feature is on, in rd mode search, only DCT is used.
3345 // After the mode is determined, this function is called, to select
3346 // transform types and get accurate rdcost.
refine_winner_mode_tx(const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,THR_MODES * best_mode_index,MB_MODE_INFO * best_mbmode,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int best_rate_y,int best_rate_uv,int * best_skip2,int winner_mode_count)3347 static AOM_INLINE void refine_winner_mode_tx(
3348     const AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost, BLOCK_SIZE bsize,
3349     PICK_MODE_CONTEXT *ctx, THR_MODES *best_mode_index,
3350     MB_MODE_INFO *best_mbmode, struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
3351     int best_rate_y, int best_rate_uv, int *best_skip2, int winner_mode_count) {
3352   const AV1_COMMON *const cm = &cpi->common;
3353   MACROBLOCKD *const xd = &x->e_mbd;
3354   MB_MODE_INFO *const mbmi = xd->mi[0];
3355   TxfmSearchParams *txfm_params = &x->txfm_search_params;
3356   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
3357   int64_t best_rd;
3358   const int num_planes = av1_num_planes(cm);
3359 
3360   if (!is_winner_mode_processing_enabled(cpi, best_mbmode, best_mbmode->mode))
3361     return;
3362 
3363   // Set params for winner mode evaluation
3364   set_mode_eval_params(cpi, x, WINNER_MODE_EVAL);
3365 
3366   // No best mode identified so far
3367   if (*best_mode_index == THR_INVALID) return;
3368 
3369   best_rd = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist);
3370   for (int mode_idx = 0; mode_idx < winner_mode_count; mode_idx++) {
3371     RD_STATS *winner_rd_stats = NULL;
3372     int winner_rate_y = 0, winner_rate_uv = 0;
3373     THR_MODES winner_mode_index = 0;
3374 
3375     // TODO(any): Combine best mode and multi-winner mode processing paths
3376     // Get winner mode stats for current mode index
3377     MB_MODE_INFO *winner_mbmi = get_winner_mode_stats(
3378         x, best_mbmode, rd_cost, best_rate_y, best_rate_uv, best_mode_index,
3379         &winner_rd_stats, &winner_rate_y, &winner_rate_uv, &winner_mode_index,
3380         cpi->sf.winner_mode_sf.multi_winner_mode_type, mode_idx);
3381 
3382     if (xd->lossless[winner_mbmi->segment_id] == 0 &&
3383         winner_mode_index != THR_INVALID &&
3384         is_winner_mode_processing_enabled(cpi, winner_mbmi,
3385                                           winner_mbmi->mode)) {
3386       RD_STATS rd_stats = *winner_rd_stats;
3387       int skip_blk = 0;
3388       RD_STATS rd_stats_y, rd_stats_uv;
3389       const int skip_ctx = av1_get_skip_txfm_context(xd);
3390 
3391       *mbmi = *winner_mbmi;
3392 
3393       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
3394 
3395       // Select prediction reference frames.
3396       for (int i = 0; i < num_planes; i++) {
3397         xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
3398         if (has_second_ref(mbmi))
3399           xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
3400       }
3401 
3402       if (is_inter_mode(mbmi->mode)) {
3403         const int mi_row = xd->mi_row;
3404         const int mi_col = xd->mi_col;
3405         av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
3406                                       av1_num_planes(cm) - 1);
3407         if (mbmi->motion_mode == OBMC_CAUSAL)
3408           av1_build_obmc_inter_predictors_sb(cm, xd);
3409 
3410         av1_subtract_plane(x, bsize, 0);
3411         if (txfm_params->tx_mode_search_type == TX_MODE_SELECT &&
3412             !xd->lossless[mbmi->segment_id]) {
3413           av1_pick_recursive_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3414                                               INT64_MAX);
3415           assert(rd_stats_y.rate != INT_MAX);
3416         } else {
3417           av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3418                                             INT64_MAX);
3419           memset(mbmi->inter_tx_size, mbmi->tx_size,
3420                  sizeof(mbmi->inter_tx_size));
3421           for (int i = 0; i < xd->height * xd->width; ++i)
3422             set_blk_skip(txfm_info->blk_skip, 0, i, rd_stats_y.skip_txfm);
3423         }
3424       } else {
3425         av1_pick_uniform_tx_size_type_yrd(cpi, x, &rd_stats_y, bsize,
3426                                           INT64_MAX);
3427       }
3428 
3429       if (num_planes > 1) {
3430         av1_txfm_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX);
3431       } else {
3432         av1_init_rd_stats(&rd_stats_uv);
3433       }
3434 
3435       const ModeCosts *mode_costs = &x->mode_costs;
3436       if (is_inter_mode(mbmi->mode) &&
3437           RDCOST(x->rdmult,
3438                  mode_costs->skip_txfm_cost[skip_ctx][0] + rd_stats_y.rate +
3439                      rd_stats_uv.rate,
3440                  (rd_stats_y.dist + rd_stats_uv.dist)) >
3441               RDCOST(x->rdmult, mode_costs->skip_txfm_cost[skip_ctx][1],
3442                      (rd_stats_y.sse + rd_stats_uv.sse))) {
3443         skip_blk = 1;
3444         rd_stats_y.rate = mode_costs->skip_txfm_cost[skip_ctx][1];
3445         rd_stats_uv.rate = 0;
3446         rd_stats_y.dist = rd_stats_y.sse;
3447         rd_stats_uv.dist = rd_stats_uv.sse;
3448       } else {
3449         skip_blk = 0;
3450         rd_stats_y.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
3451       }
3452       int this_rate = rd_stats.rate + rd_stats_y.rate + rd_stats_uv.rate -
3453                       winner_rate_y - winner_rate_uv;
3454       int64_t this_rd =
3455           RDCOST(x->rdmult, this_rate, (rd_stats_y.dist + rd_stats_uv.dist));
3456       if (best_rd > this_rd) {
3457         *best_mbmode = *mbmi;
3458         *best_mode_index = winner_mode_index;
3459         av1_copy_array(ctx->blk_skip, txfm_info->blk_skip, ctx->num_4x4_blk);
3460         av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
3461         rd_cost->rate = this_rate;
3462         rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist;
3463         rd_cost->sse = rd_stats_y.sse + rd_stats_uv.sse;
3464         rd_cost->rdcost = this_rd;
3465         best_rd = this_rd;
3466         *best_skip2 = skip_blk;
3467       }
3468     }
3469   }
3470 }
3471 
3472 /*!\cond */
3473 typedef struct {
3474   // Mask for each reference frame, specifying which prediction modes to NOT try
3475   // during search.
3476   uint32_t pred_modes[REF_FRAMES];
3477   // If ref_combo[i][j + 1] is true, do NOT try prediction using combination of
3478   // reference frames (i, j).
3479   // Note: indexing with 'j + 1' is due to the fact that 2nd reference can be -1
3480   // (NONE_FRAME).
3481   bool ref_combo[REF_FRAMES][REF_FRAMES + 1];
3482 } mode_skip_mask_t;
3483 /*!\endcond */
3484 
3485 // Update 'ref_combo' mask to disable given 'ref' in single and compound modes.
disable_reference(MV_REFERENCE_FRAME ref,bool ref_combo[REF_FRAMES][REF_FRAMES+1])3486 static AOM_INLINE void disable_reference(
3487     MV_REFERENCE_FRAME ref, bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3488   for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3489     ref_combo[ref][ref2 + 1] = true;
3490   }
3491 }
3492 
3493 // Update 'ref_combo' mask to disable all inter references except ALTREF.
disable_inter_references_except_altref(bool ref_combo[REF_FRAMES][REF_FRAMES+1])3494 static AOM_INLINE void disable_inter_references_except_altref(
3495     bool ref_combo[REF_FRAMES][REF_FRAMES + 1]) {
3496   disable_reference(LAST_FRAME, ref_combo);
3497   disable_reference(LAST2_FRAME, ref_combo);
3498   disable_reference(LAST3_FRAME, ref_combo);
3499   disable_reference(GOLDEN_FRAME, ref_combo);
3500   disable_reference(BWDREF_FRAME, ref_combo);
3501   disable_reference(ALTREF2_FRAME, ref_combo);
3502 }
3503 
3504 static const MV_REFERENCE_FRAME reduced_ref_combos[][2] = {
3505   { LAST_FRAME, NONE_FRAME },     { ALTREF_FRAME, NONE_FRAME },
3506   { LAST_FRAME, ALTREF_FRAME },   { GOLDEN_FRAME, NONE_FRAME },
3507   { INTRA_FRAME, NONE_FRAME },    { GOLDEN_FRAME, ALTREF_FRAME },
3508   { LAST_FRAME, GOLDEN_FRAME },   { LAST_FRAME, INTRA_FRAME },
3509   { LAST_FRAME, BWDREF_FRAME },   { LAST_FRAME, LAST3_FRAME },
3510   { GOLDEN_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, INTRA_FRAME },
3511   { BWDREF_FRAME, NONE_FRAME },   { BWDREF_FRAME, ALTREF_FRAME },
3512   { ALTREF_FRAME, INTRA_FRAME },  { BWDREF_FRAME, INTRA_FRAME },
3513 };
3514 
3515 static const MV_REFERENCE_FRAME real_time_ref_combos[][2] = {
3516   { LAST_FRAME, NONE_FRAME },
3517   { ALTREF_FRAME, NONE_FRAME },
3518   { GOLDEN_FRAME, NONE_FRAME },
3519   { INTRA_FRAME, NONE_FRAME }
3520 };
3521 
3522 typedef enum { REF_SET_FULL, REF_SET_REDUCED, REF_SET_REALTIME } REF_SET;
3523 
default_skip_mask(mode_skip_mask_t * mask,REF_SET ref_set)3524 static AOM_INLINE void default_skip_mask(mode_skip_mask_t *mask,
3525                                          REF_SET ref_set) {
3526   if (ref_set == REF_SET_FULL) {
3527     // Everything available by default.
3528     memset(mask, 0, sizeof(*mask));
3529   } else {
3530     // All modes available by default.
3531     memset(mask->pred_modes, 0, sizeof(mask->pred_modes));
3532     // All references disabled first.
3533     for (MV_REFERENCE_FRAME ref1 = INTRA_FRAME; ref1 < REF_FRAMES; ++ref1) {
3534       for (MV_REFERENCE_FRAME ref2 = NONE_FRAME; ref2 < REF_FRAMES; ++ref2) {
3535         mask->ref_combo[ref1][ref2 + 1] = true;
3536       }
3537     }
3538     const MV_REFERENCE_FRAME(*ref_set_combos)[2];
3539     int num_ref_combos;
3540 
3541     // Then enable reduced set of references explicitly.
3542     switch (ref_set) {
3543       case REF_SET_REDUCED:
3544         ref_set_combos = reduced_ref_combos;
3545         num_ref_combos =
3546             (int)sizeof(reduced_ref_combos) / sizeof(reduced_ref_combos[0]);
3547         break;
3548       case REF_SET_REALTIME:
3549         ref_set_combos = real_time_ref_combos;
3550         num_ref_combos =
3551             (int)sizeof(real_time_ref_combos) / sizeof(real_time_ref_combos[0]);
3552         break;
3553       default: assert(0); num_ref_combos = 0;
3554     }
3555 
3556     for (int i = 0; i < num_ref_combos; ++i) {
3557       const MV_REFERENCE_FRAME *const this_combo = ref_set_combos[i];
3558       mask->ref_combo[this_combo[0]][this_combo[1] + 1] = false;
3559     }
3560   }
3561 }
3562 
init_mode_skip_mask(mode_skip_mask_t * mask,const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize)3563 static AOM_INLINE void init_mode_skip_mask(mode_skip_mask_t *mask,
3564                                            const AV1_COMP *cpi, MACROBLOCK *x,
3565                                            BLOCK_SIZE bsize) {
3566   const AV1_COMMON *const cm = &cpi->common;
3567   const struct segmentation *const seg = &cm->seg;
3568   MACROBLOCKD *const xd = &x->e_mbd;
3569   MB_MODE_INFO *const mbmi = xd->mi[0];
3570   unsigned char segment_id = mbmi->segment_id;
3571   const SPEED_FEATURES *const sf = &cpi->sf;
3572   REF_SET ref_set = REF_SET_FULL;
3573 
3574   if (sf->rt_sf.use_real_time_ref_set)
3575     ref_set = REF_SET_REALTIME;
3576   else if (cpi->oxcf.ref_frm_cfg.enable_reduced_reference_set)
3577     ref_set = REF_SET_REDUCED;
3578 
3579   default_skip_mask(mask, ref_set);
3580 
3581   int min_pred_mv_sad = INT_MAX;
3582   MV_REFERENCE_FRAME ref_frame;
3583   if (ref_set == REF_SET_REALTIME) {
3584     // For real-time encoding, we only look at a subset of ref frames. So the
3585     // threshold for pruning should be computed from this subset as well.
3586     const int num_rt_refs =
3587         sizeof(real_time_ref_combos) / sizeof(*real_time_ref_combos);
3588     for (int r_idx = 0; r_idx < num_rt_refs; r_idx++) {
3589       const MV_REFERENCE_FRAME ref = real_time_ref_combos[r_idx][0];
3590       if (ref != INTRA_FRAME) {
3591         min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref]);
3592       }
3593     }
3594   } else {
3595     for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame)
3596       min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3597   }
3598 
3599   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
3600     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame])) {
3601       // Skip checking missing reference in both single and compound reference
3602       // modes.
3603       disable_reference(ref_frame, mask->ref_combo);
3604     } else {
3605       // Skip fixed mv modes for poor references
3606       if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) {
3607         mask->pred_modes[ref_frame] |= INTER_NEAREST_NEAR_ZERO;
3608       }
3609     }
3610     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) &&
3611         get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) {
3612       // Reference not used for the segment.
3613       disable_reference(ref_frame, mask->ref_combo);
3614     }
3615   }
3616   // Note: We use the following drop-out only if the SEG_LVL_REF_FRAME feature
3617   // is disabled for this segment. This is to prevent the possibility that we
3618   // end up unable to pick any mode.
3619   if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
3620     // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame,
3621     // unless ARNR filtering is enabled in which case we want
3622     // an unfiltered alternative. We allow near/nearest as well
3623     // because they may result in zero-zero MVs but be cheaper.
3624     if (cpi->rc.is_src_frame_alt_ref &&
3625         (cpi->oxcf.algo_cfg.arnr_max_frames == 0)) {
3626       disable_inter_references_except_altref(mask->ref_combo);
3627 
3628       mask->pred_modes[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO;
3629       const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME };
3630       int_mv near_mv, nearest_mv, global_mv;
3631       get_this_mv(&nearest_mv, NEARESTMV, 0, 0, 0, tmp_ref_frames,
3632                   &x->mbmi_ext);
3633       get_this_mv(&near_mv, NEARMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3634       get_this_mv(&global_mv, GLOBALMV, 0, 0, 0, tmp_ref_frames, &x->mbmi_ext);
3635 
3636       if (near_mv.as_int != global_mv.as_int)
3637         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARMV);
3638       if (nearest_mv.as_int != global_mv.as_int)
3639         mask->pred_modes[ALTREF_FRAME] |= (1 << NEARESTMV);
3640     }
3641   }
3642 
3643   if (cpi->rc.is_src_frame_alt_ref) {
3644     if (sf->inter_sf.alt_ref_search_fp &&
3645         (cpi->ref_frame_flags & av1_ref_frame_flag_list[ALTREF_FRAME])) {
3646       mask->pred_modes[ALTREF_FRAME] = 0;
3647       disable_inter_references_except_altref(mask->ref_combo);
3648       disable_reference(INTRA_FRAME, mask->ref_combo);
3649     }
3650   }
3651 
3652   if (sf->inter_sf.alt_ref_search_fp) {
3653     if (!cm->show_frame && x->best_pred_mv_sad < INT_MAX) {
3654       int sad_thresh = x->best_pred_mv_sad + (x->best_pred_mv_sad >> 3);
3655       // Conservatively skip the modes w.r.t. BWDREF, ALTREF2 and ALTREF, if
3656       // those are past frames
3657       for (ref_frame = BWDREF_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
3658         if (cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] <
3659             0)
3660           if (x->pred_mv_sad[ref_frame] > sad_thresh)
3661             mask->pred_modes[ref_frame] |= INTER_ALL;
3662       }
3663     }
3664   }
3665 
3666   if (bsize > sf->part_sf.max_intra_bsize) {
3667     disable_reference(INTRA_FRAME, mask->ref_combo);
3668   }
3669 
3670   mask->pred_modes[INTRA_FRAME] |=
3671       ~(sf->intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]]);
3672 }
3673 
init_neighbor_pred_buf(const OBMCBuffer * const obmc_buffer,HandleInterModeArgs * const args,int is_hbd)3674 static AOM_INLINE void init_neighbor_pred_buf(
3675     const OBMCBuffer *const obmc_buffer, HandleInterModeArgs *const args,
3676     int is_hbd) {
3677   if (is_hbd) {
3678     const int len = sizeof(uint16_t);
3679     args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred);
3680     args->above_pred_buf[1] = CONVERT_TO_BYTEPTR(obmc_buffer->above_pred +
3681                                                  (MAX_SB_SQUARE >> 1) * len);
3682     args->above_pred_buf[2] =
3683         CONVERT_TO_BYTEPTR(obmc_buffer->above_pred + MAX_SB_SQUARE * len);
3684     args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(obmc_buffer->left_pred);
3685     args->left_pred_buf[1] =
3686         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1) * len);
3687     args->left_pred_buf[2] =
3688         CONVERT_TO_BYTEPTR(obmc_buffer->left_pred + MAX_SB_SQUARE * len);
3689   } else {
3690     args->above_pred_buf[0] = obmc_buffer->above_pred;
3691     args->above_pred_buf[1] = obmc_buffer->above_pred + (MAX_SB_SQUARE >> 1);
3692     args->above_pred_buf[2] = obmc_buffer->above_pred + MAX_SB_SQUARE;
3693     args->left_pred_buf[0] = obmc_buffer->left_pred;
3694     args->left_pred_buf[1] = obmc_buffer->left_pred + (MAX_SB_SQUARE >> 1);
3695     args->left_pred_buf[2] = obmc_buffer->left_pred + MAX_SB_SQUARE;
3696   }
3697 }
3698 
prune_ref_frame(const AV1_COMP * cpi,const MACROBLOCK * x,MV_REFERENCE_FRAME ref_frame)3699 static AOM_INLINE int prune_ref_frame(const AV1_COMP *cpi, const MACROBLOCK *x,
3700                                       MV_REFERENCE_FRAME ref_frame) {
3701   const AV1_COMMON *const cm = &cpi->common;
3702   MV_REFERENCE_FRAME rf[2];
3703   av1_set_ref_frame(rf, ref_frame);
3704 
3705   if ((cpi->prune_ref_frame_mask >> ref_frame) & 1) return 1;
3706 
3707   if (prune_ref_by_selective_ref_frame(cpi, x, rf,
3708                                        cm->cur_frame->ref_display_order_hint)) {
3709     return 1;
3710   }
3711 
3712   return 0;
3713 }
3714 
is_ref_frame_used_by_compound_ref(int ref_frame,int skip_ref_frame_mask)3715 static AOM_INLINE int is_ref_frame_used_by_compound_ref(
3716     int ref_frame, int skip_ref_frame_mask) {
3717   for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) {
3718     if (!(skip_ref_frame_mask & (1 << r))) {
3719       const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES];
3720       if (rf[0] == ref_frame || rf[1] == ref_frame) {
3721         return 1;
3722       }
3723     }
3724   }
3725   return 0;
3726 }
3727 
is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,const MB_MODE_INFO * mi_cache)3728 static AOM_INLINE int is_ref_frame_used_in_cache(MV_REFERENCE_FRAME ref_frame,
3729                                                  const MB_MODE_INFO *mi_cache) {
3730   if (!mi_cache) {
3731     return 0;
3732   }
3733 
3734   if (ref_frame < REF_FRAMES) {
3735     return (ref_frame == mi_cache->ref_frame[0] ||
3736             ref_frame == mi_cache->ref_frame[1]);
3737   }
3738 
3739   // if we are here, then the current mode is compound.
3740   MV_REFERENCE_FRAME cached_ref_type = av1_ref_frame_type(mi_cache->ref_frame);
3741   return ref_frame == cached_ref_type;
3742 }
3743 
3744 // Please add/modify parameter setting in this function, making it consistent
3745 // and easy to read and maintain.
set_params_rd_pick_inter_mode(const AV1_COMP * cpi,MACROBLOCK * x,HandleInterModeArgs * args,BLOCK_SIZE bsize,mode_skip_mask_t * mode_skip_mask,int skip_ref_frame_mask,unsigned int * ref_costs_single,unsigned int (* ref_costs_comp)[REF_FRAMES],struct buf_2d (* yv12_mb)[MAX_MB_PLANE])3746 static AOM_INLINE void set_params_rd_pick_inter_mode(
3747     const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args,
3748     BLOCK_SIZE bsize, mode_skip_mask_t *mode_skip_mask, int skip_ref_frame_mask,
3749     unsigned int *ref_costs_single, unsigned int (*ref_costs_comp)[REF_FRAMES],
3750     struct buf_2d (*yv12_mb)[MAX_MB_PLANE]) {
3751   const AV1_COMMON *const cm = &cpi->common;
3752   MACROBLOCKD *const xd = &x->e_mbd;
3753   MB_MODE_INFO *const mbmi = xd->mi[0];
3754   MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
3755   unsigned char segment_id = mbmi->segment_id;
3756 
3757   init_neighbor_pred_buf(&x->obmc_buffer, args, is_cur_buf_hbd(&x->e_mbd));
3758   av1_collect_neighbors_ref_counts(xd);
3759   estimate_ref_frame_costs(cm, xd, &x->mode_costs, segment_id, ref_costs_single,
3760                            ref_costs_comp);
3761 
3762   const int mi_row = xd->mi_row;
3763   const int mi_col = xd->mi_col;
3764   x->best_pred_mv_sad = INT_MAX;
3765 
3766   for (MV_REFERENCE_FRAME ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME;
3767        ++ref_frame) {
3768     x->pred_mv_sad[ref_frame] = INT_MAX;
3769     mbmi_ext->mode_context[ref_frame] = 0;
3770     mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3771     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
3772       // Skip the ref frame if the mask says skip and the ref is not used by
3773       // compound ref.
3774       if (skip_ref_frame_mask & (1 << ref_frame) &&
3775           !is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
3776           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
3777         continue;
3778       }
3779       assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
3780       setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, yv12_mb);
3781     }
3782     // Store the best pred_mv_sad across all past frames
3783     if (cpi->sf.inter_sf.alt_ref_search_fp &&
3784         cpi->ref_frame_dist_info.ref_relative_dist[ref_frame - LAST_FRAME] < 0)
3785       x->best_pred_mv_sad =
3786           AOMMIN(x->best_pred_mv_sad, x->pred_mv_sad[ref_frame]);
3787   }
3788 
3789   if (!cpi->sf.rt_sf.use_real_time_ref_set && is_comp_ref_allowed(bsize)) {
3790     // No second reference on RT ref set, so no need to initialize
3791     for (MV_REFERENCE_FRAME ref_frame = EXTREF_FRAME;
3792          ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) {
3793       mbmi_ext->mode_context[ref_frame] = 0;
3794       mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX;
3795       const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES];
3796       if (!((cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[0]]) &&
3797             (cpi->ref_frame_flags & av1_ref_frame_flag_list[rf[1]]))) {
3798         continue;
3799       }
3800 
3801       if (skip_ref_frame_mask & (1 << ref_frame) &&
3802           !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
3803         continue;
3804       }
3805       // Ref mv list population is not required, when compound references are
3806       // pruned.
3807       if (prune_ref_frame(cpi, x, ref_frame)) continue;
3808 
3809       av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
3810                        xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
3811                        mbmi_ext->mode_context);
3812       // TODO(Ravi): Populate mbmi_ext->ref_mv_stack[ref_frame][4] and
3813       // mbmi_ext->weight[ref_frame][4] inside av1_find_mv_refs.
3814       av1_copy_usable_ref_mv_stack_and_weight(xd, mbmi_ext, ref_frame);
3815     }
3816   }
3817 
3818   av1_count_overlappable_neighbors(cm, xd);
3819   const FRAME_UPDATE_TYPE update_type =
3820       get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
3821   const int prune_obmc = cpi->ppi->frame_probs.obmc_probs[update_type][bsize] <
3822                          cpi->sf.inter_sf.prune_obmc_prob_thresh;
3823   if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
3824     if (check_num_overlappable_neighbors(mbmi) &&
3825         is_motion_variation_allowed_bsize(bsize)) {
3826       int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3827       int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3828                                        MAX_SB_SIZE >> 1 };
3829       int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
3830                                         MAX_SB_SIZE >> 1 };
3831       int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE };
3832       av1_build_prediction_by_above_preds(cm, xd, args->above_pred_buf,
3833                                           dst_width1, dst_height1,
3834                                           args->above_pred_stride);
3835       av1_build_prediction_by_left_preds(cm, xd, args->left_pred_buf,
3836                                          dst_width2, dst_height2,
3837                                          args->left_pred_stride);
3838       const int num_planes = av1_num_planes(cm);
3839       av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row,
3840                            mi_col, 0, num_planes);
3841       calc_target_weighted_pred(
3842           cm, x, xd, args->above_pred_buf[0], args->above_pred_stride[0],
3843           args->left_pred_buf[0], args->left_pred_stride[0]);
3844     }
3845   }
3846 
3847   init_mode_skip_mask(mode_skip_mask, cpi, x, bsize);
3848 
3849   // Set params for mode evaluation
3850   set_mode_eval_params(cpi, x, MODE_EVAL);
3851 
3852   x->comp_rd_stats_idx = 0;
3853 
3854   for (int idx = 0; idx < REF_FRAMES; idx++) {
3855     args->best_single_sse_in_refs[idx] = INT32_MAX;
3856   }
3857 }
3858 
init_inter_mode_search_state(InterModeSearchState * search_state,const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,int64_t best_rd_so_far)3859 static AOM_INLINE void init_inter_mode_search_state(
3860     InterModeSearchState *search_state, const AV1_COMP *cpi,
3861     const MACROBLOCK *x, BLOCK_SIZE bsize, int64_t best_rd_so_far) {
3862   init_intra_mode_search_state(&search_state->intra_search_state);
3863   av1_invalid_rd_stats(&search_state->best_y_rdcost);
3864 
3865   search_state->best_rd = best_rd_so_far;
3866   search_state->best_skip_rd[0] = INT64_MAX;
3867   search_state->best_skip_rd[1] = INT64_MAX;
3868 
3869   av1_zero(search_state->best_mbmode);
3870 
3871   search_state->best_rate_y = INT_MAX;
3872 
3873   search_state->best_rate_uv = INT_MAX;
3874 
3875   search_state->best_mode_skippable = 0;
3876 
3877   search_state->best_skip2 = 0;
3878 
3879   search_state->best_mode_index = THR_INVALID;
3880 
3881   const MACROBLOCKD *const xd = &x->e_mbd;
3882   const MB_MODE_INFO *const mbmi = xd->mi[0];
3883   const unsigned char segment_id = mbmi->segment_id;
3884 
3885   search_state->num_available_refs = 0;
3886   memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs));
3887   memset(search_state->dist_order_refs, -1,
3888          sizeof(search_state->dist_order_refs));
3889 
3890   for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i)
3891     search_state->mode_threshold[i] = 0;
3892   const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize];
3893   for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i)
3894     search_state->mode_threshold[i] =
3895         ((int64_t)rd_threshes[i] * x->thresh_freq_fact[bsize][i]) >>
3896         RD_THRESH_FAC_FRAC_BITS;
3897 
3898   search_state->best_intra_rd = INT64_MAX;
3899 
3900   search_state->best_pred_sse = UINT_MAX;
3901 
3902   av1_zero(search_state->single_newmv);
3903   av1_zero(search_state->single_newmv_rate);
3904   av1_zero(search_state->single_newmv_valid);
3905   for (int i = 0; i < MB_MODE_COUNT; ++i) {
3906     for (int j = 0; j < MAX_REF_MV_SEARCH; ++j) {
3907       for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3908         search_state->modelled_rd[i][j][ref_frame] = INT64_MAX;
3909         search_state->simple_rd[i][j][ref_frame] = INT64_MAX;
3910       }
3911     }
3912   }
3913 
3914   for (int dir = 0; dir < 2; ++dir) {
3915     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3916       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3917         SingleInterModeState *state;
3918 
3919         state = &search_state->single_state[dir][mode][ref_frame];
3920         state->ref_frame = NONE_FRAME;
3921         state->rd = INT64_MAX;
3922 
3923         state = &search_state->single_state_modelled[dir][mode][ref_frame];
3924         state->ref_frame = NONE_FRAME;
3925         state->rd = INT64_MAX;
3926       }
3927     }
3928   }
3929   for (int dir = 0; dir < 2; ++dir) {
3930     for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
3931       for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) {
3932         search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME;
3933       }
3934     }
3935   }
3936   for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) {
3937     search_state->best_single_rd[ref_frame] = INT64_MAX;
3938     search_state->best_single_mode[ref_frame] = MB_MODE_COUNT;
3939   }
3940   av1_zero(search_state->single_state_cnt);
3941   av1_zero(search_state->single_state_modelled_cnt);
3942 
3943   for (int i = 0; i < REFERENCE_MODES; ++i) {
3944     search_state->best_pred_rd[i] = INT64_MAX;
3945   }
3946 }
3947 
mask_says_skip(const mode_skip_mask_t * mode_skip_mask,const MV_REFERENCE_FRAME * ref_frame,const PREDICTION_MODE this_mode)3948 static bool mask_says_skip(const mode_skip_mask_t *mode_skip_mask,
3949                            const MV_REFERENCE_FRAME *ref_frame,
3950                            const PREDICTION_MODE this_mode) {
3951   if (mode_skip_mask->pred_modes[ref_frame[0]] & (1 << this_mode)) {
3952     return true;
3953   }
3954 
3955   return mode_skip_mask->ref_combo[ref_frame[0]][ref_frame[1] + 1];
3956 }
3957 
inter_mode_compatible_skip(const AV1_COMP * cpi,const MACROBLOCK * x,BLOCK_SIZE bsize,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames)3958 static int inter_mode_compatible_skip(const AV1_COMP *cpi, const MACROBLOCK *x,
3959                                       BLOCK_SIZE bsize,
3960                                       PREDICTION_MODE curr_mode,
3961                                       const MV_REFERENCE_FRAME *ref_frames) {
3962   const int comp_pred = ref_frames[1] > INTRA_FRAME;
3963   if (comp_pred) {
3964     if (!is_comp_ref_allowed(bsize)) return 1;
3965     if (!(cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frames[1]])) {
3966       return 1;
3967     }
3968 
3969     const AV1_COMMON *const cm = &cpi->common;
3970     if (frame_is_intra_only(cm)) return 1;
3971 
3972     const CurrentFrame *const current_frame = &cm->current_frame;
3973     if (current_frame->reference_mode == SINGLE_REFERENCE) return 1;
3974 
3975     const struct segmentation *const seg = &cm->seg;
3976     const unsigned char segment_id = x->e_mbd.mi[0]->segment_id;
3977     // Do not allow compound prediction if the segment level reference frame
3978     // feature is in use as in this case there can only be one reference.
3979     if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1;
3980   }
3981 
3982   if (ref_frames[0] > INTRA_FRAME && ref_frames[1] == INTRA_FRAME) {
3983     // Mode must be compatible
3984     if (!is_interintra_allowed_bsize(bsize)) return 1;
3985     if (!is_interintra_allowed_mode(curr_mode)) return 1;
3986   }
3987 
3988   return 0;
3989 }
3990 
fetch_picked_ref_frames_mask(const MACROBLOCK * const x,BLOCK_SIZE bsize,int mib_size)3991 static int fetch_picked_ref_frames_mask(const MACROBLOCK *const x,
3992                                         BLOCK_SIZE bsize, int mib_size) {
3993   const int sb_size_mask = mib_size - 1;
3994   const MACROBLOCKD *const xd = &x->e_mbd;
3995   const int mi_row = xd->mi_row;
3996   const int mi_col = xd->mi_col;
3997   const int mi_row_in_sb = mi_row & sb_size_mask;
3998   const int mi_col_in_sb = mi_col & sb_size_mask;
3999   const int mi_w = mi_size_wide[bsize];
4000   const int mi_h = mi_size_high[bsize];
4001   int picked_ref_frames_mask = 0;
4002   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_h; ++i) {
4003     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_w; ++j) {
4004       picked_ref_frames_mask |= x->picked_ref_frames_mask[i * 32 + j];
4005     }
4006   }
4007   return picked_ref_frames_mask;
4008 }
4009 
4010 // Check if reference frame pair of the current block matches with the given
4011 // block.
match_ref_frame_pair(const MB_MODE_INFO * mbmi,const MV_REFERENCE_FRAME * ref_frames)4012 static INLINE int match_ref_frame_pair(const MB_MODE_INFO *mbmi,
4013                                        const MV_REFERENCE_FRAME *ref_frames) {
4014   return ((ref_frames[0] == mbmi->ref_frame[0]) &&
4015           (ref_frames[1] == mbmi->ref_frame[1]));
4016 }
4017 
4018 // Case 1: return 0, means don't skip this mode
4019 // Case 2: return 1, means skip this mode completely
4020 // Case 3: return 2, means skip compound only, but still try single motion modes
inter_mode_search_order_independent_skip(const AV1_COMP * cpi,const MACROBLOCK * x,mode_skip_mask_t * mode_skip_mask,InterModeSearchState * search_state,int skip_ref_frame_mask,PREDICTION_MODE mode,const MV_REFERENCE_FRAME * ref_frame)4021 static int inter_mode_search_order_independent_skip(
4022     const AV1_COMP *cpi, const MACROBLOCK *x, mode_skip_mask_t *mode_skip_mask,
4023     InterModeSearchState *search_state, int skip_ref_frame_mask,
4024     PREDICTION_MODE mode, const MV_REFERENCE_FRAME *ref_frame) {
4025   if (mask_says_skip(mode_skip_mask, ref_frame, mode)) {
4026     return 1;
4027   }
4028 
4029   const int ref_type = av1_ref_frame_type(ref_frame);
4030   if (prune_ref_frame(cpi, x, ref_type)) return 1;
4031 
4032   // This is only used in motion vector unit test.
4033   if (cpi->oxcf.unit_test_cfg.motion_vector_unit_test &&
4034       ref_frame[0] == INTRA_FRAME)
4035     return 1;
4036 
4037   const AV1_COMMON *const cm = &cpi->common;
4038   if (skip_repeated_mv(cm, x, mode, ref_frame, search_state)) {
4039     return 1;
4040   }
4041 
4042   // Reuse the prediction mode in cache
4043   if (x->use_mb_mode_cache) {
4044     const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
4045     const PREDICTION_MODE cached_mode = cached_mi->mode;
4046     const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
4047     const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
4048 
4049     // If the cached mode is intra, then we just need to match the mode.
4050     if (is_mode_intra(cached_mode) && mode != cached_mode) {
4051       return 1;
4052     }
4053 
4054     // If the cached mode is single inter mode, then we match the mode and
4055     // reference frame.
4056     if (cached_mode_is_single) {
4057       if (mode != cached_mode || ref_frame[0] != cached_frame[0]) {
4058         return 1;
4059       }
4060     } else {
4061       // If the cached mode is compound, then we need to consider several cases.
4062       const int mode_is_single = ref_frame[1] <= INTRA_FRAME;
4063       if (mode_is_single) {
4064         // If the mode is single, we know the modes can't match. But we might
4065         // still want to search it if compound mode depends on the current mode.
4066         int skip_motion_mode_only = 0;
4067         if (cached_mode == NEW_NEARMV || cached_mode == NEW_NEARESTMV) {
4068           skip_motion_mode_only = (ref_frame[0] == cached_frame[0]);
4069         } else if (cached_mode == NEAR_NEWMV || cached_mode == NEAREST_NEWMV) {
4070           skip_motion_mode_only = (ref_frame[0] == cached_frame[1]);
4071         } else if (cached_mode == NEW_NEWMV) {
4072           skip_motion_mode_only = (ref_frame[0] == cached_frame[0] ||
4073                                    ref_frame[0] == cached_frame[1]);
4074         }
4075 
4076         return 1 + skip_motion_mode_only;
4077       } else {
4078         // If both modes are compound, then everything must match.
4079         if (mode != cached_mode || ref_frame[0] != cached_frame[0] ||
4080             ref_frame[1] != cached_frame[1]) {
4081           return 1;
4082         }
4083       }
4084     }
4085   }
4086 
4087   const MB_MODE_INFO *const mbmi = x->e_mbd.mi[0];
4088   // If no valid mode has been found so far in PARTITION_NONE when finding a
4089   // valid partition is required, do not skip mode.
4090   if (search_state->best_rd == INT64_MAX && mbmi->partition == PARTITION_NONE &&
4091       x->must_find_valid_partition)
4092     return 0;
4093 
4094   const SPEED_FEATURES *const sf = &cpi->sf;
4095   // Prune NEARMV and NEAR_NEARMV based on q index and neighbor's reference
4096   // frames
4097   if (sf->inter_sf.prune_nearmv_using_neighbors &&
4098       (mode == NEAR_NEARMV || mode == NEARMV)) {
4099     const MACROBLOCKD *const xd = &x->e_mbd;
4100     if (search_state->best_rd != INT64_MAX && xd->left_available &&
4101         xd->up_available) {
4102       const int thresholds[PRUNE_NEARMV_MAX][3] = { { 1, 0, 0 },
4103                                                     { 1, 1, 0 },
4104                                                     { 2, 1, 0 } };
4105       const int qindex_sub_range = x->qindex * 3 / QINDEX_RANGE;
4106 
4107       assert(sf->inter_sf.prune_nearmv_using_neighbors <= PRUNE_NEARMV_MAX &&
4108              qindex_sub_range < 3);
4109       const int num_ref_frame_pair_match_thresh =
4110           thresholds[sf->inter_sf.prune_nearmv_using_neighbors - 1]
4111                     [qindex_sub_range];
4112 
4113       assert(num_ref_frame_pair_match_thresh <= 2 &&
4114              num_ref_frame_pair_match_thresh >= 0);
4115       int num_ref_frame_pair_match = 0;
4116 
4117       num_ref_frame_pair_match = match_ref_frame_pair(xd->left_mbmi, ref_frame);
4118       num_ref_frame_pair_match +=
4119           match_ref_frame_pair(xd->above_mbmi, ref_frame);
4120 
4121       // Pruning based on ref frame pair match with neighbors.
4122       if (num_ref_frame_pair_match < num_ref_frame_pair_match_thresh) return 1;
4123     }
4124   }
4125 
4126   int skip_motion_mode = 0;
4127   if (mbmi->partition != PARTITION_NONE) {
4128     int skip_ref = skip_ref_frame_mask & (1 << ref_type);
4129     if (ref_type <= ALTREF_FRAME && skip_ref) {
4130       // Since the compound ref modes depends on the motion estimation result of
4131       // two single ref modes (best mv of single ref modes as the start point),
4132       // if current single ref mode is marked skip, we need to check if it will
4133       // be used in compound ref modes.
4134       if (is_ref_frame_used_by_compound_ref(ref_type, skip_ref_frame_mask)) {
4135         // Found a not skipped compound ref mode which contains current
4136         // single ref. So this single ref can't be skipped completely
4137         // Just skip its motion mode search, still try its simple
4138         // transition mode.
4139         skip_motion_mode = 1;
4140         skip_ref = 0;
4141       }
4142     }
4143     // If we are reusing the prediction from cache, and the current frame is
4144     // required by the cache, then we cannot prune it.
4145     if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
4146       skip_ref = 0;
4147       // If the cache only needs the current reference type for compound
4148       // prediction, then we can skip motion mode search.
4149       skip_motion_mode = (ref_type <= ALTREF_FRAME &&
4150                           x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
4151     }
4152     if (skip_ref) return 1;
4153   }
4154 
4155   if (ref_frame[0] == INTRA_FRAME) {
4156     if (mode != DC_PRED) {
4157       // Disable intra modes other than DC_PRED for blocks with low variance
4158       // Threshold for intra skipping based on source variance
4159       // TODO(debargha): Specialize the threshold for super block sizes
4160       const unsigned int skip_intra_var_thresh = 64;
4161       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) &&
4162           x->source_variance < skip_intra_var_thresh)
4163         return 1;
4164     }
4165   }
4166 
4167   if (skip_motion_mode) return 2;
4168 
4169   return 0;
4170 }
4171 
init_mbmi(MB_MODE_INFO * mbmi,PREDICTION_MODE curr_mode,const MV_REFERENCE_FRAME * ref_frames,const AV1_COMMON * cm)4172 static INLINE void init_mbmi(MB_MODE_INFO *mbmi, PREDICTION_MODE curr_mode,
4173                              const MV_REFERENCE_FRAME *ref_frames,
4174                              const AV1_COMMON *cm) {
4175   PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
4176   mbmi->ref_mv_idx = 0;
4177   mbmi->mode = curr_mode;
4178   mbmi->uv_mode = UV_DC_PRED;
4179   mbmi->ref_frame[0] = ref_frames[0];
4180   mbmi->ref_frame[1] = ref_frames[1];
4181   pmi->palette_size[0] = 0;
4182   pmi->palette_size[1] = 0;
4183   mbmi->filter_intra_mode_info.use_filter_intra = 0;
4184   mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0;
4185   mbmi->motion_mode = SIMPLE_TRANSLATION;
4186   mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1);
4187   set_default_interp_filters(mbmi, cm->features.interp_filter);
4188 }
4189 
collect_single_states(MACROBLOCK * x,InterModeSearchState * search_state,const MB_MODE_INFO * const mbmi)4190 static AOM_INLINE void collect_single_states(MACROBLOCK *x,
4191                                              InterModeSearchState *search_state,
4192                                              const MB_MODE_INFO *const mbmi) {
4193   int i, j;
4194   const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0];
4195   const PREDICTION_MODE this_mode = mbmi->mode;
4196   const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1;
4197   const int mode_offset = INTER_OFFSET(this_mode);
4198   const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode);
4199 
4200   // Simple rd
4201   int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame];
4202   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4203     const int64_t rd =
4204         search_state->simple_rd[this_mode][ref_mv_idx][ref_frame];
4205     if (rd < simple_rd) simple_rd = rd;
4206   }
4207 
4208   // Insertion sort of single_state
4209   const SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 };
4210   SingleInterModeState *state_s = search_state->single_state[dir][mode_offset];
4211   i = search_state->single_state_cnt[dir][mode_offset];
4212   for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j)
4213     state_s[j] = state_s[j - 1];
4214   state_s[j] = this_state_s;
4215   search_state->single_state_cnt[dir][mode_offset]++;
4216 
4217   // Modelled rd
4218   int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame];
4219   for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) {
4220     const int64_t rd =
4221         search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame];
4222     if (rd < modelled_rd) modelled_rd = rd;
4223   }
4224 
4225   // Insertion sort of single_state_modelled
4226   const SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 };
4227   SingleInterModeState *state_m =
4228       search_state->single_state_modelled[dir][mode_offset];
4229   i = search_state->single_state_modelled_cnt[dir][mode_offset];
4230   for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j)
4231     state_m[j] = state_m[j - 1];
4232   state_m[j] = this_state_m;
4233   search_state->single_state_modelled_cnt[dir][mode_offset]++;
4234 }
4235 
analyze_single_states(const AV1_COMP * cpi,InterModeSearchState * search_state)4236 static AOM_INLINE void analyze_single_states(
4237     const AV1_COMP *cpi, InterModeSearchState *search_state) {
4238   const int prune_level = cpi->sf.inter_sf.prune_comp_search_by_single_result;
4239   assert(prune_level >= 1);
4240   int i, j, dir, mode;
4241 
4242   for (dir = 0; dir < 2; ++dir) {
4243     int64_t best_rd;
4244     SingleInterModeState(*state)[FWD_REFS];
4245     const int prune_factor = prune_level >= 2 ? 6 : 5;
4246 
4247     // Use the best rd of GLOBALMV or NEWMV to prune the unlikely
4248     // reference frames for all the modes (NEARESTMV and NEARMV may not
4249     // have same motion vectors). Always keep the best of each mode
4250     // because it might form the best possible combination with other mode.
4251     state = search_state->single_state[dir];
4252     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4253                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4254     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4255       for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) {
4256         if (state[mode][i].rd != INT64_MAX &&
4257             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4258           state[mode][i].valid = 0;
4259         }
4260       }
4261     }
4262 
4263     state = search_state->single_state_modelled[dir];
4264     best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd,
4265                      state[INTER_OFFSET(GLOBALMV)][0].rd);
4266     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4267       for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; ++i) {
4268         if (state[mode][i].rd != INT64_MAX &&
4269             (state[mode][i].rd >> 3) * prune_factor > best_rd) {
4270           state[mode][i].valid = 0;
4271         }
4272       }
4273     }
4274   }
4275 
4276   // Ordering by simple rd first, then by modelled rd
4277   for (dir = 0; dir < 2; ++dir) {
4278     for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) {
4279       const int state_cnt_s = search_state->single_state_cnt[dir][mode];
4280       const int state_cnt_m =
4281           search_state->single_state_modelled_cnt[dir][mode];
4282       SingleInterModeState *state_s = search_state->single_state[dir][mode];
4283       SingleInterModeState *state_m =
4284           search_state->single_state_modelled[dir][mode];
4285       int count = 0;
4286       const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m);
4287       for (i = 0; i < state_cnt_s; ++i) {
4288         if (state_s[i].rd == INT64_MAX) break;
4289         if (state_s[i].valid) {
4290           search_state->single_rd_order[dir][mode][count++] =
4291               state_s[i].ref_frame;
4292         }
4293       }
4294       if (count >= max_candidates) continue;
4295 
4296       for (i = 0; i < state_cnt_m && count < max_candidates; ++i) {
4297         if (state_m[i].rd == INT64_MAX) break;
4298         if (!state_m[i].valid) continue;
4299         const int ref_frame = state_m[i].ref_frame;
4300         int match = 0;
4301         // Check if existing already
4302         for (j = 0; j < count; ++j) {
4303           if (search_state->single_rd_order[dir][mode][j] == ref_frame) {
4304             match = 1;
4305             break;
4306           }
4307         }
4308         if (match) continue;
4309         // Check if this ref_frame is removed in simple rd
4310         int valid = 1;
4311         for (j = 0; j < state_cnt_s; ++j) {
4312           if (ref_frame == state_s[j].ref_frame) {
4313             valid = state_s[j].valid;
4314             break;
4315           }
4316         }
4317         if (valid) {
4318           search_state->single_rd_order[dir][mode][count++] = ref_frame;
4319         }
4320       }
4321     }
4322   }
4323 }
4324 
compound_skip_get_candidates(const AV1_COMP * cpi,const InterModeSearchState * search_state,const int dir,const PREDICTION_MODE mode)4325 static int compound_skip_get_candidates(
4326     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4327     const int dir, const PREDICTION_MODE mode) {
4328   const int mode_offset = INTER_OFFSET(mode);
4329   const SingleInterModeState *state =
4330       search_state->single_state[dir][mode_offset];
4331   const SingleInterModeState *state_modelled =
4332       search_state->single_state_modelled[dir][mode_offset];
4333 
4334   int max_candidates = 0;
4335   for (int i = 0; i < FWD_REFS; ++i) {
4336     if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break;
4337     max_candidates++;
4338   }
4339 
4340   int candidates = max_candidates;
4341   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 2) {
4342     candidates = AOMMIN(2, max_candidates);
4343   }
4344   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 3) {
4345     if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX &&
4346         state[0].ref_frame == state_modelled[0].ref_frame)
4347       candidates = 1;
4348     if (mode == NEARMV || mode == GLOBALMV) candidates = 1;
4349   }
4350 
4351   if (cpi->sf.inter_sf.prune_comp_search_by_single_result >= 4) {
4352     // Limit the number of candidates to 1 in each direction for compound
4353     // prediction
4354     candidates = AOMMIN(1, candidates);
4355   }
4356   return candidates;
4357 }
4358 
compound_skip_by_single_states(const AV1_COMP * cpi,const InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,const MV_REFERENCE_FRAME second_ref_frame,const MACROBLOCK * x)4359 static int compound_skip_by_single_states(
4360     const AV1_COMP *cpi, const InterModeSearchState *search_state,
4361     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame,
4362     const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) {
4363   const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame };
4364   const int mode[2] = { compound_ref0_mode(this_mode),
4365                         compound_ref1_mode(this_mode) };
4366   const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) };
4367   const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1,
4368                             refs[1] <= GOLDEN_FRAME ? 0 : 1 };
4369   int ref_searched[2] = { 0, 0 };
4370   int ref_mv_match[2] = { 1, 1 };
4371   int i, j;
4372 
4373   for (i = 0; i < 2; ++i) {
4374     const SingleInterModeState *state =
4375         search_state->single_state[mode_dir[i]][mode_offset[i]];
4376     const int state_cnt =
4377         search_state->single_state_cnt[mode_dir[i]][mode_offset[i]];
4378     for (j = 0; j < state_cnt; ++j) {
4379       if (state[j].ref_frame == refs[i]) {
4380         ref_searched[i] = 1;
4381         break;
4382       }
4383     }
4384   }
4385 
4386   const int ref_set = get_drl_refmv_count(x, refs, this_mode);
4387   for (i = 0; i < 2; ++i) {
4388     if (!ref_searched[i] || (mode[i] != NEARESTMV && mode[i] != NEARMV)) {
4389       continue;
4390     }
4391     const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME };
4392     for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) {
4393       int_mv single_mv;
4394       int_mv comp_mv;
4395       get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, 0, single_refs,
4396                   &x->mbmi_ext);
4397       get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, 0, refs, &x->mbmi_ext);
4398       if (single_mv.as_int != comp_mv.as_int) {
4399         ref_mv_match[i] = 0;
4400         break;
4401       }
4402     }
4403   }
4404 
4405   for (i = 0; i < 2; ++i) {
4406     if (!ref_searched[i] || !ref_mv_match[i]) continue;
4407     const int candidates =
4408         compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]);
4409     const MV_REFERENCE_FRAME *ref_order =
4410         search_state->single_rd_order[mode_dir[i]][mode_offset[i]];
4411     int match = 0;
4412     for (j = 0; j < candidates; ++j) {
4413       if (refs[i] == ref_order[j]) {
4414         match = 1;
4415         break;
4416       }
4417     }
4418     if (!match) return 1;
4419   }
4420 
4421   return 0;
4422 }
4423 
4424 // Check if ref frames of current block matches with given block.
match_ref_frame(const MB_MODE_INFO * const mbmi,const MV_REFERENCE_FRAME * ref_frames,int * const is_ref_match)4425 static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
4426                                    const MV_REFERENCE_FRAME *ref_frames,
4427                                    int *const is_ref_match) {
4428   if (is_inter_block(mbmi)) {
4429     is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[0];
4430     is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[0];
4431     if (has_second_ref(mbmi)) {
4432       is_ref_match[0] |= ref_frames[0] == mbmi->ref_frame[1];
4433       is_ref_match[1] |= ref_frames[1] == mbmi->ref_frame[1];
4434     }
4435   }
4436 }
4437 
4438 // Prune compound mode using ref frames of neighbor blocks.
compound_skip_using_neighbor_refs(MACROBLOCKD * const xd,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,int prune_ext_comp_using_neighbors)4439 static INLINE int compound_skip_using_neighbor_refs(
4440     MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
4441     const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
4442   // Exclude non-extended compound modes from pruning
4443   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4444       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4445     return 0;
4446 
4447   if (prune_ext_comp_using_neighbors >= 3) return 1;
4448 
4449   int is_ref_match[2] = { 0 };  // 0 - match for forward refs
4450                                 // 1 - match for backward refs
4451   // Check if ref frames of this block matches with left neighbor.
4452   if (xd->left_available)
4453     match_ref_frame(xd->left_mbmi, ref_frames, is_ref_match);
4454 
4455   // Check if ref frames of this block matches with above neighbor.
4456   if (xd->up_available)
4457     match_ref_frame(xd->above_mbmi, ref_frames, is_ref_match);
4458 
4459   // Combine ref frame match with neighbors in forward and backward refs.
4460   const int track_ref_match = is_ref_match[0] + is_ref_match[1];
4461 
4462   // Pruning based on ref frame match with neighbors.
4463   if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
4464   return 1;
4465 }
4466 
4467 // Update best single mode for the given reference frame based on simple rd.
update_best_single_mode(InterModeSearchState * search_state,const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME ref_frame,int64_t this_rd)4468 static INLINE void update_best_single_mode(InterModeSearchState *search_state,
4469                                            const PREDICTION_MODE this_mode,
4470                                            const MV_REFERENCE_FRAME ref_frame,
4471                                            int64_t this_rd) {
4472   if (this_rd < search_state->best_single_rd[ref_frame]) {
4473     search_state->best_single_rd[ref_frame] = this_rd;
4474     search_state->best_single_mode[ref_frame] = this_mode;
4475   }
4476 }
4477 
4478 // Prune compound mode using best single mode for the same reference.
skip_compound_using_best_single_mode_ref(const PREDICTION_MODE this_mode,const MV_REFERENCE_FRAME * ref_frames,const PREDICTION_MODE * best_single_mode,int prune_comp_using_best_single_mode_ref)4479 static INLINE int skip_compound_using_best_single_mode_ref(
4480     const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME *ref_frames,
4481     const PREDICTION_MODE *best_single_mode,
4482     int prune_comp_using_best_single_mode_ref) {
4483   // Exclude non-extended compound modes from pruning
4484   if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
4485       this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
4486     return 0;
4487 
4488   assert(this_mode >= NEAREST_NEWMV && this_mode <= NEW_NEARMV);
4489   const PREDICTION_MODE comp_mode_ref0 = compound_ref0_mode(this_mode);
4490   // Get ref frame direction corresponding to NEWMV
4491   // 0 - NEWMV corresponding to forward direction
4492   // 1 - NEWMV corresponding to backward direction
4493   const int newmv_dir = comp_mode_ref0 != NEWMV;
4494 
4495   // Avoid pruning the compound mode when ref frame corresponding to NEWMV
4496   // have NEWMV as single mode winner.
4497   // Example: For an extended-compound mode,
4498   // {mode, {fwd_frame, bwd_frame}} = {NEAR_NEWMV, {LAST_FRAME, ALTREF_FRAME}}
4499   // - Ref frame corresponding to NEWMV is ALTREF_FRAME
4500   // - Avoid pruning this mode, if best single mode corresponding to ref frame
4501   //   ALTREF_FRAME is NEWMV
4502   const PREDICTION_MODE single_mode = best_single_mode[ref_frames[newmv_dir]];
4503   if (single_mode == NEWMV) return 0;
4504 
4505   // Avoid pruning the compound mode when best single mode is not available
4506   if (prune_comp_using_best_single_mode_ref == 1)
4507     if (single_mode == MB_MODE_COUNT) return 0;
4508   return 1;
4509 }
4510 
compare_int64(const void * a,const void * b)4511 static int compare_int64(const void *a, const void *b) {
4512   int64_t a64 = *((int64_t *)a);
4513   int64_t b64 = *((int64_t *)b);
4514   if (a64 < b64) {
4515     return -1;
4516   } else if (a64 == b64) {
4517     return 0;
4518   } else {
4519     return 1;
4520   }
4521 }
4522 
update_search_state(InterModeSearchState * search_state,RD_STATS * best_rd_stats_dst,PICK_MODE_CONTEXT * ctx,const RD_STATS * new_best_rd_stats,const RD_STATS * new_best_rd_stats_y,const RD_STATS * new_best_rd_stats_uv,THR_MODES new_best_mode,const MACROBLOCK * x,int txfm_search_done)4523 static INLINE void update_search_state(
4524     InterModeSearchState *search_state, RD_STATS *best_rd_stats_dst,
4525     PICK_MODE_CONTEXT *ctx, const RD_STATS *new_best_rd_stats,
4526     const RD_STATS *new_best_rd_stats_y, const RD_STATS *new_best_rd_stats_uv,
4527     THR_MODES new_best_mode, const MACROBLOCK *x, int txfm_search_done) {
4528   const MACROBLOCKD *xd = &x->e_mbd;
4529   const MB_MODE_INFO *mbmi = xd->mi[0];
4530   const int skip_ctx = av1_get_skip_txfm_context(xd);
4531   const int skip_txfm =
4532       mbmi->skip_txfm && !is_mode_intra(av1_mode_defs[new_best_mode].mode);
4533   const TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4534 
4535   search_state->best_rd = new_best_rd_stats->rdcost;
4536   search_state->best_mode_index = new_best_mode;
4537   *best_rd_stats_dst = *new_best_rd_stats;
4538   search_state->best_mbmode = *mbmi;
4539   search_state->best_skip2 = skip_txfm;
4540   search_state->best_mode_skippable = new_best_rd_stats->skip_txfm;
4541   // When !txfm_search_done, new_best_rd_stats won't provide correct rate_y and
4542   // rate_uv because av1_txfm_search process is replaced by rd estimation.
4543   // Therefore, we should avoid updating best_rate_y and best_rate_uv here.
4544   // These two values will be updated when av1_txfm_search is called.
4545   if (txfm_search_done) {
4546     search_state->best_rate_y =
4547         new_best_rd_stats_y->rate +
4548         x->mode_costs.skip_txfm_cost[skip_ctx]
4549                                     [new_best_rd_stats->skip_txfm || skip_txfm];
4550     search_state->best_rate_uv = new_best_rd_stats_uv->rate;
4551   }
4552   search_state->best_y_rdcost = *new_best_rd_stats_y;
4553   memcpy(ctx->blk_skip, txfm_info->blk_skip,
4554          sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
4555   av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
4556 }
4557 
4558 // Find the best RD for a reference frame (among single reference modes)
4559 // and store +10% of it in the 0-th element in ref_frame_rd.
find_top_ref(int64_t ref_frame_rd[REF_FRAMES])4560 static AOM_INLINE void find_top_ref(int64_t ref_frame_rd[REF_FRAMES]) {
4561   assert(ref_frame_rd[0] == INT64_MAX);
4562   int64_t ref_copy[REF_FRAMES - 1];
4563   memcpy(ref_copy, ref_frame_rd + 1,
4564          sizeof(ref_frame_rd[0]) * (REF_FRAMES - 1));
4565   qsort(ref_copy, REF_FRAMES - 1, sizeof(int64_t), compare_int64);
4566 
4567   int64_t cutoff = ref_copy[0];
4568   // The cut-off is within 10% of the best.
4569   if (cutoff != INT64_MAX) {
4570     assert(cutoff < INT64_MAX / 200);
4571     cutoff = (110 * cutoff) / 100;
4572   }
4573   ref_frame_rd[0] = cutoff;
4574 }
4575 
4576 // Check if either frame is within the cutoff.
in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],MV_REFERENCE_FRAME frame1,MV_REFERENCE_FRAME frame2)4577 static INLINE bool in_single_ref_cutoff(int64_t ref_frame_rd[REF_FRAMES],
4578                                         MV_REFERENCE_FRAME frame1,
4579                                         MV_REFERENCE_FRAME frame2) {
4580   assert(frame2 > 0);
4581   return ref_frame_rd[frame1] <= ref_frame_rd[0] ||
4582          ref_frame_rd[frame2] <= ref_frame_rd[0];
4583 }
4584 
evaluate_motion_mode_for_winner_candidates(const AV1_COMP * const cpi,MACROBLOCK * const x,RD_STATS * const rd_cost,HandleInterModeArgs * const args,TileDataEnc * const tile_data,PICK_MODE_CONTEXT * const ctx,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],const motion_mode_best_st_candidate * const best_motion_mode_cands,int do_tx_search,const BLOCK_SIZE bsize,int64_t * const best_est_rd,InterModeSearchState * const search_state,int64_t * yrd)4585 static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
4586     const AV1_COMP *const cpi, MACROBLOCK *const x, RD_STATS *const rd_cost,
4587     HandleInterModeArgs *const args, TileDataEnc *const tile_data,
4588     PICK_MODE_CONTEXT *const ctx,
4589     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],
4590     const motion_mode_best_st_candidate *const best_motion_mode_cands,
4591     int do_tx_search, const BLOCK_SIZE bsize, int64_t *const best_est_rd,
4592     InterModeSearchState *const search_state, int64_t *yrd) {
4593   const AV1_COMMON *const cm = &cpi->common;
4594   const int num_planes = av1_num_planes(cm);
4595   MACROBLOCKD *const xd = &x->e_mbd;
4596   MB_MODE_INFO *const mbmi = xd->mi[0];
4597   InterModesInfo *const inter_modes_info = x->inter_modes_info;
4598   const int num_best_cand = best_motion_mode_cands->num_motion_mode_cand;
4599 
4600   for (int cand = 0; cand < num_best_cand; cand++) {
4601     RD_STATS rd_stats;
4602     RD_STATS rd_stats_y;
4603     RD_STATS rd_stats_uv;
4604     av1_init_rd_stats(&rd_stats);
4605     av1_init_rd_stats(&rd_stats_y);
4606     av1_init_rd_stats(&rd_stats_uv);
4607     int rate_mv;
4608 
4609     rate_mv = best_motion_mode_cands->motion_mode_cand[cand].rate_mv;
4610     args->skip_motion_mode =
4611         best_motion_mode_cands->motion_mode_cand[cand].skip_motion_mode;
4612     *mbmi = best_motion_mode_cands->motion_mode_cand[cand].mbmi;
4613     rd_stats.rate =
4614         best_motion_mode_cands->motion_mode_cand[cand].rate2_nocoeff;
4615 
4616     // Continue if the best candidate is compound.
4617     if (!is_inter_singleref_mode(mbmi->mode)) continue;
4618 
4619     x->txfm_search_info.skip_txfm = 0;
4620     struct macroblockd_plane *pd = xd->plane;
4621     const BUFFER_SET orig_dst = {
4622       { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
4623       { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
4624     };
4625 
4626     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4627     // Initialize motion mode to simple translation
4628     // Calculation of switchable rate depends on it.
4629     mbmi->motion_mode = 0;
4630     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4631     for (int i = 0; i < num_planes; i++) {
4632       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4633       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4634     }
4635 
4636     int64_t skip_rd[2] = { search_state->best_skip_rd[0],
4637                            search_state->best_skip_rd[1] };
4638     int64_t this_yrd = INT64_MAX;
4639     int64_t ret_value = motion_mode_rd(
4640         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, args,
4641         search_state->best_rd, skip_rd, &rate_mv, &orig_dst, best_est_rd,
4642         do_tx_search, inter_modes_info, 1, &this_yrd);
4643 
4644     if (ret_value != INT64_MAX) {
4645       rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4646       const THR_MODES mode_enum = get_prediction_mode_idx(
4647           mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4648       // Collect mode stats for multiwinner mode processing
4649       store_winner_mode_stats(
4650           &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv,
4651           mode_enum, NULL, bsize, rd_stats.rdcost,
4652           cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
4653       if (rd_stats.rdcost < search_state->best_rd) {
4654         *yrd = this_yrd;
4655         update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4656                             &rd_stats_uv, mode_enum, x, do_tx_search);
4657         if (do_tx_search) search_state->best_skip_rd[0] = skip_rd[0];
4658       }
4659     }
4660   }
4661 }
4662 
4663 /*!\cond */
4664 // Arguments for speed feature pruning of inter mode search
4665 typedef struct {
4666   int *skip_motion_mode;
4667   mode_skip_mask_t *mode_skip_mask;
4668   InterModeSearchState *search_state;
4669   int skip_ref_frame_mask;
4670   int reach_first_comp_mode;
4671   int mode_thresh_mul_fact;
4672   int num_single_modes_processed;
4673   int prune_cpd_using_sr_stats_ready;
4674 } InterModeSFArgs;
4675 /*!\endcond */
4676 
skip_inter_mode(AV1_COMP * cpi,MACROBLOCK * x,const BLOCK_SIZE bsize,int64_t * ref_frame_rd,int midx,InterModeSFArgs * args)4677 static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
4678                            int64_t *ref_frame_rd, int midx,
4679                            InterModeSFArgs *args) {
4680   const SPEED_FEATURES *const sf = &cpi->sf;
4681   MACROBLOCKD *const xd = &x->e_mbd;
4682   // Get the actual prediction mode we are trying in this iteration
4683   const THR_MODES mode_enum = av1_default_mode_order[midx];
4684   const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
4685   const PREDICTION_MODE this_mode = mode_def->mode;
4686   const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
4687   const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
4688   const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
4689   const int comp_pred = second_ref_frame > INTRA_FRAME;
4690 
4691   if (ref_frame == INTRA_FRAME) return 1;
4692 
4693   // Check if this mode should be skipped because it is incompatible with the
4694   // current frame
4695   if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
4696     return 1;
4697   const int ret = inter_mode_search_order_independent_skip(
4698       cpi, x, args->mode_skip_mask, args->search_state,
4699       args->skip_ref_frame_mask, this_mode, mode_def->ref_frame);
4700   if (ret == 1) return 1;
4701   *(args->skip_motion_mode) = (ret == 2);
4702 
4703   // We've reached the first compound prediction mode, get stats from the
4704   // single reference predictors to help with pruning
4705   if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred &&
4706       args->reach_first_comp_mode == 0) {
4707     analyze_single_states(cpi, args->search_state);
4708     args->reach_first_comp_mode = 1;
4709   }
4710 
4711   // Prune aggressively when best mode is skippable.
4712   int mul_fact = args->search_state->best_mode_skippable
4713                      ? args->mode_thresh_mul_fact
4714                      : (1 << MODE_THRESH_QBITS);
4715   int64_t mode_threshold =
4716       (args->search_state->mode_threshold[mode_enum] * mul_fact) >>
4717       MODE_THRESH_QBITS;
4718 
4719   if (args->search_state->best_rd < mode_threshold) return 1;
4720 
4721   // Skip this compound mode based on the RD results from the single prediction
4722   // modes
4723   if (sf->inter_sf.prune_comp_search_by_single_result > 0 && comp_pred) {
4724     if (compound_skip_by_single_states(cpi, args->search_state, this_mode,
4725                                        ref_frame, second_ref_frame, x))
4726       return 1;
4727   }
4728 
4729   if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
4730     // After we done with single reference modes, find the 2nd best RD
4731     // for a reference frame. Only search compound modes that have a reference
4732     // frame at least as good as the 2nd best.
4733     if (!args->prune_cpd_using_sr_stats_ready &&
4734         args->num_single_modes_processed == NUM_SINGLE_REF_MODES) {
4735       find_top_ref(ref_frame_rd);
4736       args->prune_cpd_using_sr_stats_ready = 1;
4737     }
4738     if (args->prune_cpd_using_sr_stats_ready &&
4739         !in_single_ref_cutoff(ref_frame_rd, ref_frame, second_ref_frame))
4740       return 1;
4741   }
4742 
4743   // Skip NEW_NEARMV and NEAR_NEWMV extended compound modes
4744   if (sf->inter_sf.skip_ext_comp_nearmv_mode &&
4745       (this_mode == NEW_NEARMV || this_mode == NEAR_NEWMV)) {
4746     return 1;
4747   }
4748 
4749   if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
4750     if (compound_skip_using_neighbor_refs(
4751             xd, this_mode, ref_frames,
4752             sf->inter_sf.prune_ext_comp_using_neighbors))
4753       return 1;
4754   }
4755 
4756   if (sf->inter_sf.prune_comp_using_best_single_mode_ref && comp_pred) {
4757     if (skip_compound_using_best_single_mode_ref(
4758             this_mode, ref_frames, args->search_state->best_single_mode,
4759             sf->inter_sf.prune_comp_using_best_single_mode_ref))
4760       return 1;
4761   }
4762 
4763   if (sf->inter_sf.prune_nearest_near_mv_using_refmv_weight && !comp_pred) {
4764     const int8_t ref_frame_type = av1_ref_frame_type(ref_frames);
4765     if (skip_nearest_near_mv_using_refmv_weight(x, this_mode, ref_frame_type))
4766       return 1;
4767   }
4768 
4769   return 0;
4770 }
4771 
record_best_compound(REFERENCE_MODE reference_mode,RD_STATS * rd_stats,int comp_pred,int rdmult,InterModeSearchState * search_state,int compmode_cost)4772 static void record_best_compound(REFERENCE_MODE reference_mode,
4773                                  RD_STATS *rd_stats, int comp_pred, int rdmult,
4774                                  InterModeSearchState *search_state,
4775                                  int compmode_cost) {
4776   int64_t single_rd, hybrid_rd, single_rate, hybrid_rate;
4777 
4778   if (reference_mode == REFERENCE_MODE_SELECT) {
4779     single_rate = rd_stats->rate - compmode_cost;
4780     hybrid_rate = rd_stats->rate;
4781   } else {
4782     single_rate = rd_stats->rate;
4783     hybrid_rate = rd_stats->rate + compmode_cost;
4784   }
4785 
4786   single_rd = RDCOST(rdmult, single_rate, rd_stats->dist);
4787   hybrid_rd = RDCOST(rdmult, hybrid_rate, rd_stats->dist);
4788 
4789   if (!comp_pred) {
4790     if (single_rd < search_state->best_pred_rd[SINGLE_REFERENCE])
4791       search_state->best_pred_rd[SINGLE_REFERENCE] = single_rd;
4792   } else {
4793     if (single_rd < search_state->best_pred_rd[COMPOUND_REFERENCE])
4794       search_state->best_pred_rd[COMPOUND_REFERENCE] = single_rd;
4795   }
4796   if (hybrid_rd < search_state->best_pred_rd[REFERENCE_MODE_SELECT])
4797     search_state->best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd;
4798 }
4799 
4800 // Does a transform search over a list of the best inter mode candidates.
4801 // This is called if the original mode search computed an RD estimate
4802 // for the transform search rather than doing a full search.
tx_search_best_inter_candidates(AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int64_t best_rd_so_far,BLOCK_SIZE bsize,struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE],int mi_row,int mi_col,InterModeSearchState * search_state,RD_STATS * rd_cost,PICK_MODE_CONTEXT * ctx,int64_t * yrd)4803 static void tx_search_best_inter_candidates(
4804     AV1_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x,
4805     int64_t best_rd_so_far, BLOCK_SIZE bsize,
4806     struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int mi_row, int mi_col,
4807     InterModeSearchState *search_state, RD_STATS *rd_cost,
4808     PICK_MODE_CONTEXT *ctx, int64_t *yrd) {
4809   AV1_COMMON *const cm = &cpi->common;
4810   MACROBLOCKD *const xd = &x->e_mbd;
4811   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
4812   const ModeCosts *mode_costs = &x->mode_costs;
4813   const int num_planes = av1_num_planes(cm);
4814   const int skip_ctx = av1_get_skip_txfm_context(xd);
4815   MB_MODE_INFO *const mbmi = xd->mi[0];
4816   InterModesInfo *inter_modes_info = x->inter_modes_info;
4817   inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr);
4818   search_state->best_rd = best_rd_so_far;
4819   search_state->best_mode_index = THR_INVALID;
4820   // Initialize best mode stats for winner mode processing
4821   x->winner_mode_count = 0;
4822   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
4823                           NULL, bsize, best_rd_so_far,
4824                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
4825   inter_modes_info->num =
4826       inter_modes_info->num < cpi->sf.rt_sf.num_inter_modes_for_tx_search
4827           ? inter_modes_info->num
4828           : cpi->sf.rt_sf.num_inter_modes_for_tx_search;
4829   const int64_t top_est_rd =
4830       inter_modes_info->num > 0
4831           ? inter_modes_info
4832                 ->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]
4833           : INT64_MAX;
4834   *yrd = INT64_MAX;
4835   int64_t best_rd_in_this_partition = INT64_MAX;
4836   int num_inter_mode_cands = inter_modes_info->num;
4837   int newmv_mode_evaled = 0;
4838   int max_allowed_cands = INT_MAX;
4839   if (cpi->sf.inter_sf.limit_inter_mode_cands) {
4840     // The bound on the no. of inter mode candidates, beyond which the
4841     // candidates are limited if a newmv mode got evaluated, is set as
4842     // max_allowed_cands + 1.
4843     const int num_allowed_cands[4] = { INT_MAX, 10, 9, 6 };
4844     assert(cpi->sf.inter_sf.limit_inter_mode_cands <= 3);
4845     max_allowed_cands =
4846         num_allowed_cands[cpi->sf.inter_sf.limit_inter_mode_cands];
4847   }
4848 
4849   int num_mode_thresh = INT_MAX;
4850   if (cpi->sf.inter_sf.limit_txfm_eval_per_mode) {
4851     // Bound the no. of transform searches per prediction mode beyond a
4852     // threshold.
4853     const int num_mode_thresh_ary[3] = { INT_MAX, 4, 3 };
4854     assert(cpi->sf.inter_sf.limit_txfm_eval_per_mode <= 2);
4855     num_mode_thresh =
4856         num_mode_thresh_ary[cpi->sf.inter_sf.limit_txfm_eval_per_mode];
4857   }
4858 
4859   int num_tx_cands = 0;
4860   int num_tx_search_modes[INTER_MODE_END - INTER_MODE_START] = { 0 };
4861   // Iterate over best inter mode candidates and perform tx search
4862   for (int j = 0; j < num_inter_mode_cands; ++j) {
4863     const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
4864     *mbmi = inter_modes_info->mbmi_arr[data_idx];
4865     int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
4866     if (curr_est_rd * 0.80 > top_est_rd) break;
4867 
4868     if (num_tx_cands > num_mode_thresh) {
4869       if ((mbmi->mode != NEARESTMV &&
4870            num_tx_search_modes[mbmi->mode - INTER_MODE_START] >= 1) ||
4871           (mbmi->mode == NEARESTMV &&
4872            num_tx_search_modes[mbmi->mode - INTER_MODE_START] >= 2))
4873         continue;
4874     }
4875 
4876     txfm_info->skip_txfm = 0;
4877     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4878 
4879     // Select prediction reference frames.
4880     const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME;
4881     for (int i = 0; i < num_planes; i++) {
4882       xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i];
4883       if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i];
4884     }
4885 
4886     // Build the prediction for this mode
4887     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
4888                                   av1_num_planes(cm) - 1);
4889     if (mbmi->motion_mode == OBMC_CAUSAL) {
4890       av1_build_obmc_inter_predictors_sb(cm, xd);
4891     }
4892 
4893     // Initialize RD stats
4894     RD_STATS rd_stats;
4895     RD_STATS rd_stats_y;
4896     RD_STATS rd_stats_uv;
4897     const int mode_rate = inter_modes_info->mode_rate_arr[data_idx];
4898     int64_t skip_rd = INT64_MAX;
4899     if (cpi->sf.inter_sf.txfm_rd_gate_level) {
4900       // Check if the mode is good enough based on skip RD
4901       int64_t curr_sse = inter_modes_info->sse_arr[data_idx];
4902       skip_rd = RDCOST(x->rdmult, mode_rate, curr_sse);
4903       int eval_txfm =
4904           check_txfm_eval(x, bsize, search_state->best_skip_rd[0], skip_rd,
4905                           cpi->sf.inter_sf.txfm_rd_gate_level, 0);
4906       if (!eval_txfm) continue;
4907     }
4908 
4909     num_tx_cands++;
4910     if (have_newmv_in_inter_mode(mbmi->mode)) newmv_mode_evaled = 1;
4911     num_tx_search_modes[mbmi->mode - INTER_MODE_START]++;
4912     int64_t this_yrd = INT64_MAX;
4913     // Do the transform search
4914     if (!av1_txfm_search(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv,
4915                          mode_rate, search_state->best_rd)) {
4916       continue;
4917     } else {
4918       const int y_rate =
4919           rd_stats.skip_txfm
4920               ? mode_costs->skip_txfm_cost[skip_ctx][1]
4921               : (rd_stats_y.rate + mode_costs->skip_txfm_cost[skip_ctx][0]);
4922       this_yrd = RDCOST(x->rdmult, y_rate + mode_rate, rd_stats_y.dist);
4923 
4924       if (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1) {
4925         inter_mode_data_push(
4926             tile_data, mbmi->bsize, rd_stats.sse, rd_stats.dist,
4927             rd_stats_y.rate + rd_stats_uv.rate +
4928                 mode_costs->skip_txfm_cost[skip_ctx][mbmi->skip_txfm]);
4929       }
4930     }
4931     rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist);
4932     if (rd_stats.rdcost < best_rd_in_this_partition) {
4933       best_rd_in_this_partition = rd_stats.rdcost;
4934       *yrd = this_yrd;
4935     }
4936 
4937     const THR_MODES mode_enum = get_prediction_mode_idx(
4938         mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4939 
4940     // Collect mode stats for multiwinner mode processing
4941     const int txfm_search_done = 1;
4942     store_winner_mode_stats(
4943         &cpi->common, x, mbmi, &rd_stats, &rd_stats_y, &rd_stats_uv, mode_enum,
4944         NULL, bsize, rd_stats.rdcost,
4945         cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
4946 
4947     if (rd_stats.rdcost < search_state->best_rd) {
4948       update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
4949                           &rd_stats_uv, mode_enum, x, txfm_search_done);
4950       search_state->best_skip_rd[0] = skip_rd;
4951       // Limit the total number of modes to be evaluated if the first is valid
4952       // and transform skip or compound
4953       if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
4954         if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
4955           // Evaluate more candidates at high quantizers where occurrence of
4956           // transform skip is high.
4957           const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
4958           const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
4959           num_inter_mode_cands =
4960               AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
4961         } else if (!j && has_second_ref(&search_state->best_mbmode)) {
4962           const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
4963           // Evaluate more candidates at low quantizers where occurrence of
4964           // single reference mode is high.
4965           const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
4966                                                 { 10, 7, 5, 3 } };
4967           const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
4968           num_inter_mode_cands = AOMMIN(
4969               max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
4970         }
4971       }
4972     }
4973     // If the number of candidates evaluated exceeds max_allowed_cands, break if
4974     // a newmv mode was evaluated already.
4975     if ((num_tx_cands > max_allowed_cands) && newmv_mode_evaled) break;
4976   }
4977 }
4978 
4979 // Indicates number of winner simple translation modes to be used
4980 static const unsigned int num_winner_motion_modes[3] = { 0, 10, 3 };
4981 
4982 // Adds a motion mode to the candidate list for motion_mode_for_winner_cand
4983 // speed feature. This list consists of modes that have only searched
4984 // SIMPLE_TRANSLATION. The final list will be used to search other motion
4985 // modes after the initial RD search.
handle_winner_cand(MB_MODE_INFO * const mbmi,motion_mode_best_st_candidate * best_motion_mode_cands,int max_winner_motion_mode_cand,int64_t this_rd,motion_mode_candidate * motion_mode_cand,int skip_motion_mode)4986 static void handle_winner_cand(
4987     MB_MODE_INFO *const mbmi,
4988     motion_mode_best_st_candidate *best_motion_mode_cands,
4989     int max_winner_motion_mode_cand, int64_t this_rd,
4990     motion_mode_candidate *motion_mode_cand, int skip_motion_mode) {
4991   // Number of current motion mode candidates in list
4992   const int num_motion_mode_cand = best_motion_mode_cands->num_motion_mode_cand;
4993   int valid_motion_mode_cand_loc = num_motion_mode_cand;
4994 
4995   // find the best location to insert new motion mode candidate
4996   for (int j = 0; j < num_motion_mode_cand; j++) {
4997     if (this_rd < best_motion_mode_cands->motion_mode_cand[j].rd_cost) {
4998       valid_motion_mode_cand_loc = j;
4999       break;
5000     }
5001   }
5002 
5003   // Insert motion mode if location is found
5004   if (valid_motion_mode_cand_loc < max_winner_motion_mode_cand) {
5005     if (num_motion_mode_cand > 0 &&
5006         valid_motion_mode_cand_loc < max_winner_motion_mode_cand - 1)
5007       memmove(
5008           &best_motion_mode_cands
5009                ->motion_mode_cand[valid_motion_mode_cand_loc + 1],
5010           &best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc],
5011           (AOMMIN(num_motion_mode_cand, max_winner_motion_mode_cand - 1) -
5012            valid_motion_mode_cand_loc) *
5013               sizeof(best_motion_mode_cands->motion_mode_cand[0]));
5014     motion_mode_cand->mbmi = *mbmi;
5015     motion_mode_cand->rd_cost = this_rd;
5016     motion_mode_cand->skip_motion_mode = skip_motion_mode;
5017     best_motion_mode_cands->motion_mode_cand[valid_motion_mode_cand_loc] =
5018         *motion_mode_cand;
5019     best_motion_mode_cands->num_motion_mode_cand =
5020         AOMMIN(max_winner_motion_mode_cand,
5021                best_motion_mode_cands->num_motion_mode_cand + 1);
5022   }
5023 }
5024 
5025 /*!\brief Search intra modes in interframes
5026  *
5027  * \ingroup intra_mode_search
5028  *
5029  * This function searches for the best intra mode when the current frame is an
5030  * interframe. This function however does *not* handle luma palette mode.
5031  * Palette mode is currently handled by \ref av1_search_palette_mode.
5032  *
5033  * This function will first iterate through the luma mode candidates to find the
5034  * best luma intra mode. Once the best luma mode it's found, it will then search
5035  * for the best chroma mode. Because palette mode is currently not handled by
5036  * here, a cache of uv mode is stored in
5037  * InterModeSearchState::intra_search_state so it can be reused later by \ref
5038  * av1_search_palette_mode.
5039  *
5040  * \return Returns the rdcost of the current intra-mode if it's available,
5041  * otherwise returns INT64_MAX. The corresponding values in x->e_mbd.mi[0],
5042  * rd_stats, rd_stats_y/uv, and best_intra_rd are also updated. Moreover, in the
5043  * first evocation of the function, the chroma intra mode result is cached in
5044  * intra_search_state to be used in subsequent calls. In the first evaluation
5045  * with directional mode, a prune_mask computed with histogram of gradient is
5046  * also stored in intra_search_state.
5047  *
5048  * \param[in,out] search_state      Struct keep track of the prediction mode
5049  *                                  search state in interframe.
5050  *
5051  * \param[in]     cpi               Top-level encoder structure.
5052  * \param[in]     x                 Pointer to struct holding all the data for
5053  *                                  the current prediction block.
5054  * \param[out]    rd_cost           Stores the best rd_cost among all the
5055  *                                  prediction modes searched.
5056  * \param[in]     bsize             Current block size.
5057  * \param[in,out] ctx               Structure to hold the number of 4x4 blks to
5058  *                                  copy the tx_type and txfm_skip arrays.
5059  *                                  for only the Y plane.
5060  * \param[in,out] sf_args           Stores the list of intra mode candidates
5061  *                                  to be searched.
5062  * \param[in]     intra_ref_frame_cost  The entropy cost for signaling that the
5063  *                                      current ref frame is an intra frame.
5064  * \param[in]     yrd_threshold     The rdcost threshold for luma intra mode to
5065  *                                  terminate chroma intra mode search.
5066  *
5067  * \return Returns INT64_MAX if the determined motion mode is invalid and the
5068  * current motion mode being tested should be skipped. It returns 0 if the
5069  * motion mode search is a success.
5070  */
search_intra_modes_in_interframe(InterModeSearchState * search_state,const AV1_COMP * cpi,MACROBLOCK * x,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,InterModeSFArgs * sf_args,unsigned int intra_ref_frame_cost,int64_t yrd_threshold)5071 static AOM_INLINE void search_intra_modes_in_interframe(
5072     InterModeSearchState *search_state, const AV1_COMP *cpi, MACROBLOCK *x,
5073     RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5074     InterModeSFArgs *sf_args, unsigned int intra_ref_frame_cost,
5075     int64_t yrd_threshold) {
5076   const AV1_COMMON *const cm = &cpi->common;
5077   const SPEED_FEATURES *const sf = &cpi->sf;
5078   MACROBLOCKD *const xd = &x->e_mbd;
5079   MB_MODE_INFO *const mbmi = xd->mi[0];
5080   IntraModeSearchState *intra_search_state = &search_state->intra_search_state;
5081 
5082   int is_best_y_mode_intra = 0;
5083   RD_STATS best_intra_rd_stats_y;
5084   int64_t best_rd_y = INT64_MAX;
5085   int best_mode_cost_y = -1;
5086   MB_MODE_INFO best_mbmi = *xd->mi[0];
5087   THR_MODES best_mode_enum = THR_INVALID;
5088   uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
5089   uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
5090   const int num_4x4 = bsize_to_num_blk(bsize);
5091 
5092   // Performs luma search
5093   int64_t best_model_rd = INT64_MAX;
5094   int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
5095   for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
5096     top_intra_model_rd[i] = INT64_MAX;
5097   }
5098   for (int mode_idx = 0; mode_idx < LUMA_MODE_COUNT; ++mode_idx) {
5099     if (sf->intra_sf.skip_intra_in_interframe &&
5100         search_state->intra_search_state.skip_intra_modes)
5101       break;
5102     set_y_mode_and_delta_angle(mode_idx, mbmi);
5103     assert(mbmi->mode < INTRA_MODE_END);
5104 
5105     // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
5106     if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
5107       continue;
5108 
5109     const THR_MODES mode_enum =
5110         get_prediction_mode_idx(mbmi->mode, INTRA_FRAME, NONE_FRAME);
5111     if ((!cpi->oxcf.intra_mode_cfg.enable_smooth_intra ||
5112          cpi->sf.intra_sf.disable_smooth_intra) &&
5113         (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
5114          mbmi->mode == SMOOTH_V_PRED))
5115       continue;
5116     if (!cpi->oxcf.intra_mode_cfg.enable_paeth_intra &&
5117         mbmi->mode == PAETH_PRED)
5118       continue;
5119     if (av1_is_directional_mode(mbmi->mode) &&
5120         av1_use_angle_delta(bsize) == 0 && mbmi->angle_delta[PLANE_TYPE_Y] != 0)
5121       continue;
5122     const PREDICTION_MODE this_mode = mbmi->mode;
5123 
5124     assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
5125     assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
5126     init_mbmi(mbmi, this_mode, av1_mode_defs[mode_enum].ref_frame, cm);
5127     x->txfm_search_info.skip_txfm = 0;
5128 
5129     if (this_mode != DC_PRED) {
5130       // Only search the oblique modes if the best so far is
5131       // one of the neighboring directional modes
5132       if ((sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) &&
5133           (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) {
5134         if (search_state->best_mode_index != THR_INVALID &&
5135             search_state->best_mbmode.ref_frame[0] > INTRA_FRAME)
5136           continue;
5137       }
5138       if (sf->rt_sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) {
5139         if (conditional_skipintra(
5140                 this_mode, search_state->intra_search_state.best_intra_mode))
5141           continue;
5142       }
5143     }
5144 
5145     RD_STATS intra_rd_stats_y;
5146     int mode_cost_y;
5147     int64_t intra_rd_y = INT64_MAX;
5148     const int is_luma_result_valid = av1_handle_intra_y_mode(
5149         intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
5150         &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
5151         &best_model_rd, top_intra_model_rd);
5152     if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
5153       is_best_y_mode_intra = 1;
5154       if (intra_rd_y < best_rd_y) {
5155         best_intra_rd_stats_y = intra_rd_stats_y;
5156         best_mode_cost_y = mode_cost_y;
5157         best_rd_y = intra_rd_y;
5158         best_mbmi = *mbmi;
5159         best_mode_enum = mode_enum;
5160         memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
5161                sizeof(best_blk_skip[0]) * num_4x4);
5162         av1_copy_array(best_tx_type_map, xd->tx_type_map, num_4x4);
5163       }
5164     }
5165   }
5166 
5167   if (!is_best_y_mode_intra) {
5168     return;
5169   }
5170 
5171   assert(best_rd_y < INT64_MAX);
5172 
5173   // Restores the best luma mode
5174   *mbmi = best_mbmi;
5175   memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
5176          sizeof(best_blk_skip[0]) * num_4x4);
5177   av1_copy_array(xd->tx_type_map, best_tx_type_map, num_4x4);
5178 
5179   // Performs chroma search
5180   RD_STATS intra_rd_stats, intra_rd_stats_uv;
5181   av1_init_rd_stats(&intra_rd_stats);
5182   av1_init_rd_stats(&intra_rd_stats_uv);
5183   const int num_planes = av1_num_planes(cm);
5184   if (num_planes > 1) {
5185     const int intra_uv_mode_valid = av1_search_intra_uv_modes_in_interframe(
5186         intra_search_state, cpi, x, bsize, &intra_rd_stats,
5187         &best_intra_rd_stats_y, &intra_rd_stats_uv, search_state->best_rd);
5188 
5189     if (!intra_uv_mode_valid) {
5190       return;
5191     }
5192   }
5193 
5194   // Merge the luma and chroma rd stats
5195   assert(best_mode_cost_y >= 0);
5196   intra_rd_stats.rate = best_intra_rd_stats_y.rate + best_mode_cost_y;
5197   if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) {
5198     // av1_pick_uniform_tx_size_type_yrd above includes the cost of the tx_size
5199     // in the tokenonly rate, but for intra blocks, tx_size is always coded
5200     // (prediction granularity), so we account for it in the full rate,
5201     // not the tokenonly rate.
5202     best_intra_rd_stats_y.rate -= tx_size_cost(x, bsize, mbmi->tx_size);
5203   }
5204 
5205   const ModeCosts *mode_costs = &x->mode_costs;
5206   const PREDICTION_MODE mode = mbmi->mode;
5207   if (num_planes > 1 && xd->is_chroma_ref) {
5208     const int uv_mode_cost =
5209         mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mode][mbmi->uv_mode];
5210     intra_rd_stats.rate +=
5211         intra_rd_stats_uv.rate +
5212         intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
5213   }
5214 
5215   // Intra block is always coded as non-skip
5216   intra_rd_stats.skip_txfm = 0;
5217   intra_rd_stats.dist = best_intra_rd_stats_y.dist + intra_rd_stats_uv.dist;
5218   // Add in the cost of the no skip flag.
5219   const int skip_ctx = av1_get_skip_txfm_context(xd);
5220   intra_rd_stats.rate += mode_costs->skip_txfm_cost[skip_ctx][0];
5221   // Calculate the final RD estimate for this mode.
5222   const int64_t this_rd =
5223       RDCOST(x->rdmult, intra_rd_stats.rate, intra_rd_stats.dist);
5224   // Keep record of best intra rd
5225   if (this_rd < search_state->best_intra_rd) {
5226     search_state->best_intra_rd = this_rd;
5227     intra_search_state->best_intra_mode = mode;
5228   }
5229 
5230   for (int i = 0; i < REFERENCE_MODES; ++i) {
5231     search_state->best_pred_rd[i] =
5232         AOMMIN(search_state->best_pred_rd[i], this_rd);
5233   }
5234 
5235   intra_rd_stats.rdcost = this_rd;
5236 
5237   // Collect mode stats for multiwinner mode processing
5238   const int txfm_search_done = 1;
5239   store_winner_mode_stats(
5240       &cpi->common, x, mbmi, &intra_rd_stats, &best_intra_rd_stats_y,
5241       &intra_rd_stats_uv, best_mode_enum, NULL, bsize, intra_rd_stats.rdcost,
5242       cpi->sf.winner_mode_sf.multi_winner_mode_type, txfm_search_done);
5243   if (intra_rd_stats.rdcost < search_state->best_rd) {
5244     update_search_state(search_state, rd_cost, ctx, &intra_rd_stats,
5245                         &best_intra_rd_stats_y, &intra_rd_stats_uv,
5246                         best_mode_enum, x, txfm_search_done);
5247   }
5248 }
5249 
5250 #if !CONFIG_REALTIME_ONLY
5251 // Prepare inter_cost and intra_cost from TPL stats, which are used as ML
5252 // features in intra mode pruning.
calculate_cost_from_tpl_data(const AV1_COMP * cpi,MACROBLOCK * x,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t * inter_cost,int64_t * intra_cost)5253 static AOM_INLINE void calculate_cost_from_tpl_data(
5254     const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
5255     int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
5256   const AV1_COMMON *const cm = &cpi->common;
5257   // Only consider full SB.
5258   const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
5259   const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
5260   const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
5261                   (block_size_high[sb_size] / tpl_bsize_1d);
5262   SuperBlockEnc *sb_enc = &x->sb_enc;
5263   if (sb_enc->tpl_data_count == len) {
5264     const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
5265     const int tpl_stride = sb_enc->tpl_stride;
5266     const int tplw = mi_size_wide[tpl_bsize];
5267     const int tplh = mi_size_high[tpl_bsize];
5268     const int nw = mi_size_wide[bsize] / tplw;
5269     const int nh = mi_size_high[bsize] / tplh;
5270     if (nw >= 1 && nh >= 1) {
5271       const int of_h = mi_row % mi_size_high[sb_size];
5272       const int of_w = mi_col % mi_size_wide[sb_size];
5273       const int start = of_h / tplh * tpl_stride + of_w / tplw;
5274 
5275       for (int k = 0; k < nh; k++) {
5276         for (int l = 0; l < nw; l++) {
5277           *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
5278           *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
5279         }
5280       }
5281       *inter_cost /= nw * nh;
5282       *intra_cost /= nw * nh;
5283     }
5284   }
5285 }
5286 #endif  // !CONFIG_REALTIME_ONLY
5287 
5288 // When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
5289 // intra mode search.
skip_intra_modes_in_interframe(AV1_COMMON * const cm,struct macroblock * x,BLOCK_SIZE bsize,InterModeSearchState * search_state,int64_t inter_cost,int64_t intra_cost,int skip_intra_in_interframe)5290 static AOM_INLINE void skip_intra_modes_in_interframe(
5291     AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
5292     InterModeSearchState *search_state, int64_t inter_cost, int64_t intra_cost,
5293     int skip_intra_in_interframe) {
5294   MACROBLOCKD *const xd = &x->e_mbd;
5295   // Prune intra search based on best inter mode being transfrom skip.
5296   if ((skip_intra_in_interframe >= 2) && search_state->best_mbmode.skip_txfm) {
5297     const int qindex_thresh[2] = { 200, MAXQ };
5298     const int ind = (skip_intra_in_interframe >= 3) ? 1 : 0;
5299     if (!have_newmv_in_inter_mode(search_state->best_mbmode.mode) &&
5300         (x->qindex <= qindex_thresh[ind])) {
5301       search_state->intra_search_state.skip_intra_modes = 1;
5302       return;
5303     } else if ((skip_intra_in_interframe >= 4) &&
5304                (inter_cost < 0 || intra_cost < 0)) {
5305       search_state->intra_search_state.skip_intra_modes = 1;
5306       return;
5307     }
5308   }
5309   // Use ML model to prune intra search.
5310   if (inter_cost >= 0 && intra_cost >= 0) {
5311     const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
5312                                      ? &av1_intrap_nn_config
5313                                      : &av1_intrap_hd_nn_config;
5314     float nn_features[6];
5315     float scores[2] = { 0.0f };
5316 
5317     nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
5318     nn_features[1] = (float)mi_size_wide_log2[bsize];
5319     nn_features[2] = (float)mi_size_high_log2[bsize];
5320     nn_features[3] = (float)intra_cost;
5321     nn_features[4] = (float)inter_cost;
5322     const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
5323     const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
5324     nn_features[5] = (float)(ac_q_max / ac_q);
5325 
5326     av1_nn_predict(nn_features, nn_config, 1, scores);
5327 
5328     // For two parameters, the max prob returned from av1_nn_softmax equals
5329     // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
5330     // calling of av1_nn_softmax.
5331     const float thresh[5] = { 1.4f, 1.4f, 1.4f, 1.4f, 1.4f };
5332     assert(skip_intra_in_interframe <= 5);
5333     if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
5334       search_state->intra_search_state.skip_intra_modes = 1;
5335     }
5336   }
5337 }
5338 
5339 // TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
av1_rd_pick_inter_mode(struct AV1_COMP * cpi,struct TileDataEnc * tile_data,struct macroblock * x,struct RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5340 void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
5341                             struct macroblock *x, struct RD_STATS *rd_cost,
5342                             BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
5343                             int64_t best_rd_so_far) {
5344   AV1_COMMON *const cm = &cpi->common;
5345   const FeatureFlags *const features = &cm->features;
5346   const int num_planes = av1_num_planes(cm);
5347   const SPEED_FEATURES *const sf = &cpi->sf;
5348   MACROBLOCKD *const xd = &x->e_mbd;
5349   MB_MODE_INFO *const mbmi = xd->mi[0];
5350   TxfmSearchInfo *txfm_info = &x->txfm_search_info;
5351   int i;
5352   const ModeCosts *mode_costs = &x->mode_costs;
5353   const int *comp_inter_cost =
5354       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5355 
5356   InterModeSearchState search_state;
5357   init_inter_mode_search_state(&search_state, cpi, x, bsize, best_rd_so_far);
5358   INTERINTRA_MODE interintra_modes[REF_FRAMES] = {
5359     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES,
5360     INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES
5361   };
5362   HandleInterModeArgs args = { { NULL },
5363                                { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE },
5364                                { NULL },
5365                                { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1,
5366                                  MAX_SB_SIZE >> 1 },
5367                                NULL,
5368                                NULL,
5369                                NULL,
5370                                search_state.modelled_rd,
5371                                INT_MAX,
5372                                INT_MAX,
5373                                search_state.simple_rd,
5374                                0,
5375                                interintra_modes,
5376                                { { { 0 }, { { 0 } }, { 0 }, 0, 0, 0, 0 } },
5377                                0,
5378                                -1,
5379                                -1,
5380                                -1,
5381                                { 0 },
5382                                { 0 } };
5383   for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
5384   // Indicates the appropriate number of simple translation winner modes for
5385   // exhaustive motion mode evaluation
5386   const int max_winner_motion_mode_cand =
5387       num_winner_motion_modes[cpi->sf.winner_mode_sf
5388                                   .motion_mode_for_winner_cand];
5389   assert(max_winner_motion_mode_cand <= MAX_WINNER_MOTION_MODES);
5390   motion_mode_candidate motion_mode_cand;
5391   motion_mode_best_st_candidate best_motion_mode_cands;
5392   // Initializing the number of motion mode candidates to zero.
5393   best_motion_mode_cands.num_motion_mode_cand = 0;
5394   for (i = 0; i < MAX_WINNER_MOTION_MODES; ++i)
5395     best_motion_mode_cands.motion_mode_cand[i].rd_cost = INT64_MAX;
5396 
5397   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5398 
5399   av1_invalid_rd_stats(rd_cost);
5400 
5401   for (i = 0; i < REF_FRAMES; ++i) {
5402     x->warp_sample_info[i].num = -1;
5403   }
5404 
5405   // Ref frames that are selected by square partition blocks.
5406   int picked_ref_frames_mask = 0;
5407   if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions &&
5408       mbmi->partition != PARTITION_NONE) {
5409     // prune_ref_frame_for_rect_partitions = 1 implies prune only extended
5410     // partition blocks. prune_ref_frame_for_rect_partitions >=2
5411     // implies prune for vert, horiz and extended partition blocks.
5412     if ((mbmi->partition != PARTITION_VERT &&
5413          mbmi->partition != PARTITION_HORZ) ||
5414         cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
5415       picked_ref_frames_mask =
5416           fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
5417     }
5418   }
5419 
5420 #if CONFIG_COLLECT_COMPONENT_TIMING
5421   start_timing(cpi, set_params_rd_pick_inter_mode_time);
5422 #endif
5423   // Skip ref frames that never selected by square blocks.
5424   const int skip_ref_frame_mask =
5425       picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
5426   mode_skip_mask_t mode_skip_mask;
5427   unsigned int ref_costs_single[REF_FRAMES];
5428   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5429   struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE];
5430   // init params, set frame modes, speed features
5431   set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
5432                                 skip_ref_frame_mask, ref_costs_single,
5433                                 ref_costs_comp, yv12_mb);
5434 #if CONFIG_COLLECT_COMPONENT_TIMING
5435   end_timing(cpi, set_params_rd_pick_inter_mode_time);
5436 #endif
5437 
5438   int64_t best_est_rd = INT64_MAX;
5439   const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
5440   // If do_tx_search is 0, only estimated RD should be computed.
5441   // If do_tx_search is 1, all modes have TX search performed.
5442   const int do_tx_search =
5443       !((cpi->sf.inter_sf.inter_mode_rd_model_estimation == 1 && md->ready) ||
5444         (cpi->sf.inter_sf.inter_mode_rd_model_estimation == 2 &&
5445          num_pels_log2_lookup[bsize] > 8) ||
5446         cpi->sf.rt_sf.force_tx_search_off);
5447   InterModesInfo *inter_modes_info = x->inter_modes_info;
5448   inter_modes_info->num = 0;
5449 
5450   // Temporary buffers used by handle_inter_mode().
5451   uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
5452 
5453   // The best RD found for the reference frame, among single reference modes.
5454   // Note that the 0-th element will contain a cut-off that is later used
5455   // to determine if we should skip a compound mode.
5456   int64_t ref_frame_rd[REF_FRAMES] = { INT64_MAX, INT64_MAX, INT64_MAX,
5457                                        INT64_MAX, INT64_MAX, INT64_MAX,
5458                                        INT64_MAX, INT64_MAX };
5459 
5460   // Prepared stats used later to check if we could skip intra mode eval.
5461   int64_t inter_cost = -1;
5462   int64_t intra_cost = -1;
5463   // Need to tweak the threshold for hdres speed 0 & 1.
5464   const int mi_row = xd->mi_row;
5465   const int mi_col = xd->mi_col;
5466 
5467   // Obtain the relevant tpl stats for pruning inter modes
5468   PruneInfoFromTpl inter_cost_info_from_tpl;
5469 #if !CONFIG_REALTIME_ONLY
5470   if (cpi->sf.inter_sf.prune_inter_modes_based_on_tpl) {
5471     // x->tpl_keep_ref_frame[id] = 1 => no pruning in
5472     // prune_ref_by_selective_ref_frame()
5473     // x->tpl_keep_ref_frame[id] = 0  => ref frame can be pruned in
5474     // prune_ref_by_selective_ref_frame()
5475     // Populating valid_refs[idx] = 1 ensures that
5476     // 'inter_cost_info_from_tpl.best_inter_cost' does not correspond to a
5477     // pruned ref frame.
5478     int valid_refs[INTER_REFS_PER_FRAME];
5479     for (MV_REFERENCE_FRAME frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
5480       const MV_REFERENCE_FRAME refs[2] = { frame, NONE_FRAME };
5481       valid_refs[frame - 1] =
5482           x->tpl_keep_ref_frame[frame] ||
5483           !prune_ref_by_selective_ref_frame(
5484               cpi, x, refs, cm->cur_frame->ref_display_order_hint);
5485     }
5486     av1_zero(inter_cost_info_from_tpl);
5487     get_block_level_tpl_stats(cpi, bsize, mi_row, mi_col, valid_refs,
5488                               &inter_cost_info_from_tpl);
5489   }
5490 
5491   const int do_pruning =
5492       (AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
5493   if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
5494       cpi->oxcf.algo_cfg.enable_tpl_model)
5495     calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
5496                                  &intra_cost);
5497 #endif  // !CONFIG_REALTIME_ONLY
5498 
5499   // Initialize best mode stats for winner mode processing
5500   zero_winner_mode_stats(bsize, MAX_WINNER_MODE_COUNT_INTER,
5501                          x->winner_mode_stats);
5502   x->winner_mode_count = 0;
5503   store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
5504                           NULL, bsize, best_rd_so_far,
5505                           cpi->sf.winner_mode_sf.multi_winner_mode_type, 0);
5506 
5507   int mode_thresh_mul_fact = (1 << MODE_THRESH_QBITS);
5508   if (sf->inter_sf.prune_inter_modes_if_skippable) {
5509     // Higher multiplication factor values for lower quantizers.
5510     mode_thresh_mul_fact = mode_threshold_mul_factor[x->qindex];
5511   }
5512 
5513   // Initialize arguments for mode loop speed features
5514   InterModeSFArgs sf_args = { &args.skip_motion_mode,
5515                               &mode_skip_mask,
5516                               &search_state,
5517                               skip_ref_frame_mask,
5518                               0,
5519                               mode_thresh_mul_fact,
5520                               0,
5521                               0 };
5522   int64_t best_inter_yrd = INT64_MAX;
5523 
5524   // This is the main loop of this function. It loops over all possible inter
5525   // modes and calls handle_inter_mode() to compute the RD for each.
5526   // Here midx is just an iterator index that should not be used by itself
5527   // except to keep track of the number of modes searched. It should be used
5528   // with av1_default_mode_order to get the enum that defines the mode, which
5529   // can be used with av1_mode_defs to get the prediction mode and the ref
5530   // frames.
5531   // TODO(yunqing, any): Setting mode_start and mode_end outside for-loop brings
5532   // good speedup for real time case. If we decide to use compound mode in real
5533   // time, maybe we can modify av1_default_mode_order table.
5534   THR_MODES mode_start = THR_INTER_MODE_START;
5535   THR_MODES mode_end = THR_INTER_MODE_END;
5536   const CurrentFrame *const current_frame = &cm->current_frame;
5537   if (current_frame->reference_mode == SINGLE_REFERENCE) {
5538     mode_start = SINGLE_REF_MODE_START;
5539     mode_end = SINGLE_REF_MODE_END;
5540   }
5541 
5542   for (THR_MODES midx = mode_start; midx < mode_end; ++midx) {
5543     // Get the actual prediction mode we are trying in this iteration
5544     const THR_MODES mode_enum = av1_default_mode_order[midx];
5545     const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
5546     const PREDICTION_MODE this_mode = mode_def->mode;
5547     const MV_REFERENCE_FRAME *ref_frames = mode_def->ref_frame;
5548 
5549     const MV_REFERENCE_FRAME ref_frame = ref_frames[0];
5550     const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
5551     const int is_single_pred =
5552         ref_frame > INTRA_FRAME && second_ref_frame == NONE_FRAME;
5553     const int comp_pred = second_ref_frame > INTRA_FRAME;
5554 
5555     init_mbmi(mbmi, this_mode, ref_frames, cm);
5556 
5557     txfm_info->skip_txfm = 0;
5558     sf_args.num_single_modes_processed += is_single_pred;
5559     set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
5560 #if CONFIG_COLLECT_COMPONENT_TIMING
5561     start_timing(cpi, skip_inter_mode_time);
5562 #endif
5563     // Apply speed features to decide if this inter mode can be skipped
5564     const int is_skip_inter_mode =
5565         skip_inter_mode(cpi, x, bsize, ref_frame_rd, midx, &sf_args);
5566 #if CONFIG_COLLECT_COMPONENT_TIMING
5567     end_timing(cpi, skip_inter_mode_time);
5568 #endif
5569     if (is_skip_inter_mode) continue;
5570 
5571     // Select prediction reference frames.
5572     for (i = 0; i < num_planes; i++) {
5573       xd->plane[i].pre[0] = yv12_mb[ref_frame][i];
5574       if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i];
5575     }
5576 
5577     mbmi->angle_delta[PLANE_TYPE_Y] = 0;
5578     mbmi->angle_delta[PLANE_TYPE_UV] = 0;
5579     mbmi->filter_intra_mode_info.use_filter_intra = 0;
5580     mbmi->ref_mv_idx = 0;
5581 
5582     const int64_t ref_best_rd = search_state.best_rd;
5583     RD_STATS rd_stats, rd_stats_y, rd_stats_uv;
5584     av1_init_rd_stats(&rd_stats);
5585 
5586     const int ref_frame_cost = comp_pred
5587                                    ? ref_costs_comp[ref_frame][second_ref_frame]
5588                                    : ref_costs_single[ref_frame];
5589     const int compmode_cost =
5590         is_comp_ref_allowed(mbmi->bsize) ? comp_inter_cost[comp_pred] : 0;
5591     const int real_compmode_cost =
5592         cm->current_frame.reference_mode == REFERENCE_MODE_SELECT
5593             ? compmode_cost
5594             : 0;
5595     // Point to variables that are maintained between loop iterations
5596     args.single_newmv = search_state.single_newmv;
5597     args.single_newmv_rate = search_state.single_newmv_rate;
5598     args.single_newmv_valid = search_state.single_newmv_valid;
5599     args.single_comp_cost = real_compmode_cost;
5600     args.ref_frame_cost = ref_frame_cost;
5601 
5602     int64_t skip_rd[2] = { search_state.best_skip_rd[0],
5603                            search_state.best_skip_rd[1] };
5604     int64_t this_yrd = INT64_MAX;
5605 #if CONFIG_COLLECT_COMPONENT_TIMING
5606     start_timing(cpi, handle_inter_mode_time);
5607 #endif
5608     int64_t this_rd = handle_inter_mode(
5609         cpi, tile_data, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &args,
5610         ref_best_rd, tmp_buf, &x->comp_rd_buffer, &best_est_rd, do_tx_search,
5611         inter_modes_info, &motion_mode_cand, skip_rd, &inter_cost_info_from_tpl,
5612         &this_yrd);
5613 #if CONFIG_COLLECT_COMPONENT_TIMING
5614     end_timing(cpi, handle_inter_mode_time);
5615 #endif
5616     if (sf->inter_sf.prune_comp_search_by_single_result > 0 &&
5617         is_inter_singleref_mode(this_mode)) {
5618       collect_single_states(x, &search_state, mbmi);
5619     }
5620 
5621     if (sf->inter_sf.prune_comp_using_best_single_mode_ref > 0 &&
5622         is_inter_singleref_mode(this_mode))
5623       update_best_single_mode(&search_state, this_mode, ref_frame, this_rd);
5624 
5625     if (this_rd == INT64_MAX) continue;
5626 
5627     if (mbmi->skip_txfm) {
5628       rd_stats_y.rate = 0;
5629       rd_stats_uv.rate = 0;
5630     }
5631 
5632     if (sf->inter_sf.prune_compound_using_single_ref && is_single_pred &&
5633         this_rd < ref_frame_rd[ref_frame]) {
5634       ref_frame_rd[ref_frame] = this_rd;
5635     }
5636 
5637     // Did this mode help, i.e., is it the new best mode
5638     if (this_rd < search_state.best_rd) {
5639       assert(IMPLIES(comp_pred,
5640                      cm->current_frame.reference_mode != SINGLE_REFERENCE));
5641       search_state.best_pred_sse = x->pred_sse[ref_frame];
5642       best_inter_yrd = this_yrd;
5643       update_search_state(&search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
5644                           &rd_stats_uv, mode_enum, x, do_tx_search);
5645       if (do_tx_search) search_state.best_skip_rd[0] = skip_rd[0];
5646       search_state.best_skip_rd[1] = skip_rd[1];
5647     }
5648     if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5649       // Add this mode to motion mode candidate list for motion mode search
5650       // if using motion_mode_for_winner_cand speed feature
5651       handle_winner_cand(mbmi, &best_motion_mode_cands,
5652                          max_winner_motion_mode_cand, this_rd,
5653                          &motion_mode_cand, args.skip_motion_mode);
5654     }
5655 
5656     /* keep record of best compound/single-only prediction */
5657     record_best_compound(cm->current_frame.reference_mode, &rd_stats, comp_pred,
5658                          x->rdmult, &search_state, compmode_cost);
5659   }
5660 
5661 #if CONFIG_COLLECT_COMPONENT_TIMING
5662   start_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5663 #endif
5664   if (cpi->sf.winner_mode_sf.motion_mode_for_winner_cand) {
5665     // For the single ref winner candidates, evaluate other motion modes (non
5666     // simple translation).
5667     evaluate_motion_mode_for_winner_candidates(
5668         cpi, x, rd_cost, &args, tile_data, ctx, yv12_mb,
5669         &best_motion_mode_cands, do_tx_search, bsize, &best_est_rd,
5670         &search_state, &best_inter_yrd);
5671   }
5672 #if CONFIG_COLLECT_COMPONENT_TIMING
5673   end_timing(cpi, evaluate_motion_mode_for_winner_candidates_time);
5674 #endif
5675 
5676 #if CONFIG_COLLECT_COMPONENT_TIMING
5677   start_timing(cpi, do_tx_search_time);
5678 #endif
5679   if (do_tx_search != 1) {
5680     // A full tx search has not yet been done, do tx search for
5681     // top mode candidates
5682     tx_search_best_inter_candidates(cpi, tile_data, x, best_rd_so_far, bsize,
5683                                     yv12_mb, mi_row, mi_col, &search_state,
5684                                     rd_cost, ctx, &best_inter_yrd);
5685   }
5686 #if CONFIG_COLLECT_COMPONENT_TIMING
5687   end_timing(cpi, do_tx_search_time);
5688 #endif
5689 
5690 #if CONFIG_COLLECT_COMPONENT_TIMING
5691   start_timing(cpi, handle_intra_mode_time);
5692 #endif
5693   // Gate intra mode evaluation if best of inter is skip except when source
5694   // variance is extremely low
5695   const unsigned int src_var_thresh_intra_skip = 1;
5696   const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
5697   if (skip_intra_in_interframe &&
5698       (x->source_variance > src_var_thresh_intra_skip))
5699     skip_intra_modes_in_interframe(cm, x, bsize, &search_state, inter_cost,
5700                                    intra_cost, skip_intra_in_interframe);
5701 
5702   const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
5703   search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
5704                                    &sf_args, intra_ref_frame_cost,
5705                                    best_inter_yrd);
5706 #if CONFIG_COLLECT_COMPONENT_TIMING
5707   end_timing(cpi, handle_intra_mode_time);
5708 #endif
5709 
5710 #if CONFIG_COLLECT_COMPONENT_TIMING
5711   start_timing(cpi, refine_winner_mode_tx_time);
5712 #endif
5713   int winner_mode_count =
5714       cpi->sf.winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
5715   // In effect only when fast tx search speed features are enabled.
5716   refine_winner_mode_tx(
5717       cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
5718       &search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
5719       search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
5720 #if CONFIG_COLLECT_COMPONENT_TIMING
5721   end_timing(cpi, refine_winner_mode_tx_time);
5722 #endif
5723 
5724   // Initialize default mode evaluation params
5725   set_mode_eval_params(cpi, x, DEFAULT_EVAL);
5726 
5727   // Only try palette mode when the best mode so far is an intra mode.
5728   const int try_palette =
5729       cpi->oxcf.tool_cfg.enable_palette &&
5730       av1_allow_palette(features->allow_screen_content_tools, mbmi->bsize) &&
5731       !is_inter_mode(search_state.best_mbmode.mode) && rd_cost->rate != INT_MAX;
5732   RD_STATS this_rd_cost;
5733   int this_skippable = 0;
5734   if (try_palette) {
5735 #if CONFIG_COLLECT_COMPONENT_TIMING
5736     start_timing(cpi, av1_search_palette_mode_time);
5737 #endif
5738     this_skippable = av1_search_palette_mode(
5739         &search_state.intra_search_state, cpi, x, bsize, intra_ref_frame_cost,
5740         ctx, &this_rd_cost, search_state.best_rd);
5741 #if CONFIG_COLLECT_COMPONENT_TIMING
5742     end_timing(cpi, av1_search_palette_mode_time);
5743 #endif
5744     if (this_rd_cost.rdcost < search_state.best_rd) {
5745       search_state.best_mode_index = THR_DC;
5746       mbmi->mv[0].as_int = 0;
5747       rd_cost->rate = this_rd_cost.rate;
5748       rd_cost->dist = this_rd_cost.dist;
5749       rd_cost->rdcost = this_rd_cost.rdcost;
5750       search_state.best_rd = rd_cost->rdcost;
5751       search_state.best_mbmode = *mbmi;
5752       search_state.best_skip2 = 0;
5753       search_state.best_mode_skippable = this_skippable;
5754       memcpy(ctx->blk_skip, txfm_info->blk_skip,
5755              sizeof(txfm_info->blk_skip[0]) * ctx->num_4x4_blk);
5756       av1_copy_array(ctx->tx_type_map, xd->tx_type_map, ctx->num_4x4_blk);
5757     }
5758   }
5759 
5760   search_state.best_mbmode.skip_mode = 0;
5761   if (cm->current_frame.skip_mode_info.skip_mode_flag &&
5762       is_comp_ref_allowed(bsize)) {
5763     const struct segmentation *const seg = &cm->seg;
5764     unsigned char segment_id = mbmi->segment_id;
5765     if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) {
5766       rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, yv12_mb);
5767     }
5768   }
5769 
5770   // Make sure that the ref_mv_idx is only nonzero when we're
5771   // using a mode which can support ref_mv_idx
5772   if (search_state.best_mbmode.ref_mv_idx != 0 &&
5773       !(search_state.best_mbmode.mode == NEWMV ||
5774         search_state.best_mbmode.mode == NEW_NEWMV ||
5775         have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) {
5776     search_state.best_mbmode.ref_mv_idx = 0;
5777   }
5778 
5779   if (search_state.best_mode_index == THR_INVALID ||
5780       search_state.best_rd >= best_rd_so_far) {
5781     rd_cost->rate = INT_MAX;
5782     rd_cost->rdcost = INT64_MAX;
5783     return;
5784   }
5785 
5786   const InterpFilter interp_filter = features->interp_filter;
5787   assert((interp_filter == SWITCHABLE) ||
5788          (interp_filter ==
5789           search_state.best_mbmode.interp_filters.as_filters.y_filter) ||
5790          !is_inter_block(&search_state.best_mbmode));
5791   assert((interp_filter == SWITCHABLE) ||
5792          (interp_filter ==
5793           search_state.best_mbmode.interp_filters.as_filters.x_filter) ||
5794          !is_inter_block(&search_state.best_mbmode));
5795 
5796   if (!cpi->rc.is_src_frame_alt_ref && cpi->sf.inter_sf.adaptive_rd_thresh) {
5797     av1_update_rd_thresh_fact(
5798         cm, x->thresh_freq_fact, sf->inter_sf.adaptive_rd_thresh, bsize,
5799         search_state.best_mode_index, mode_start, mode_end, THR_DC, MAX_MODES);
5800   }
5801 
5802   // macroblock modes
5803   *mbmi = search_state.best_mbmode;
5804   txfm_info->skip_txfm |= search_state.best_skip2;
5805 
5806   // Note: this section is needed since the mode may have been forced to
5807   // GLOBALMV by the all-zero mode handling of ref-mv.
5808   if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) {
5809     // Correct the interp filters for GLOBALMV
5810     if (is_nontrans_global_motion(xd, xd->mi[0])) {
5811       int_interpfilters filters =
5812           av1_broadcast_interp_filter(av1_unswitchable_filter(interp_filter));
5813       assert(mbmi->interp_filters.as_int == filters.as_int);
5814       (void)filters;
5815     }
5816   }
5817 
5818   for (i = 0; i < REFERENCE_MODES; ++i) {
5819     if (search_state.best_pred_rd[i] == INT64_MAX) {
5820       search_state.best_pred_diff[i] = INT_MIN;
5821     } else {
5822       search_state.best_pred_diff[i] =
5823           search_state.best_rd - search_state.best_pred_rd[i];
5824     }
5825   }
5826 
5827   txfm_info->skip_txfm |= search_state.best_mode_skippable;
5828 
5829   assert(search_state.best_mode_index != THR_INVALID);
5830 
5831 #if CONFIG_INTERNAL_STATS
5832   store_coding_context(x, ctx, search_state.best_mode_index,
5833                        search_state.best_pred_diff,
5834                        search_state.best_mode_skippable);
5835 #else
5836   store_coding_context(x, ctx, search_state.best_pred_diff,
5837                        search_state.best_mode_skippable);
5838 #endif  // CONFIG_INTERNAL_STATS
5839 
5840   if (mbmi->palette_mode_info.palette_size[1] > 0) {
5841     assert(try_palette);
5842     av1_restore_uv_color_map(cpi, x);
5843   }
5844 }
5845 
av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * x,int mi_row,int mi_col,RD_STATS * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd_so_far)5846 void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
5847                                         TileDataEnc *tile_data, MACROBLOCK *x,
5848                                         int mi_row, int mi_col,
5849                                         RD_STATS *rd_cost, BLOCK_SIZE bsize,
5850                                         PICK_MODE_CONTEXT *ctx,
5851                                         int64_t best_rd_so_far) {
5852   const AV1_COMMON *const cm = &cpi->common;
5853   const FeatureFlags *const features = &cm->features;
5854   MACROBLOCKD *const xd = &x->e_mbd;
5855   MB_MODE_INFO *const mbmi = xd->mi[0];
5856   unsigned char segment_id = mbmi->segment_id;
5857   const int comp_pred = 0;
5858   int i;
5859   int64_t best_pred_diff[REFERENCE_MODES];
5860   unsigned int ref_costs_single[REF_FRAMES];
5861   unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES];
5862   const ModeCosts *mode_costs = &x->mode_costs;
5863   const int *comp_inter_cost =
5864       mode_costs->comp_inter_cost[av1_get_reference_mode_context(xd)];
5865   InterpFilter best_filter = SWITCHABLE;
5866   int64_t this_rd = INT64_MAX;
5867   int rate2 = 0;
5868   const int64_t distortion2 = 0;
5869   (void)mi_row;
5870   (void)mi_col;
5871   (void)tile_data;
5872 
5873   av1_collect_neighbors_ref_counts(xd);
5874 
5875   estimate_ref_frame_costs(cm, xd, mode_costs, segment_id, ref_costs_single,
5876                            ref_costs_comp);
5877 
5878   for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX;
5879   for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX;
5880 
5881   rd_cost->rate = INT_MAX;
5882 
5883   assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP));
5884 
5885   mbmi->palette_mode_info.palette_size[0] = 0;
5886   mbmi->palette_mode_info.palette_size[1] = 0;
5887   mbmi->filter_intra_mode_info.use_filter_intra = 0;
5888   mbmi->mode = GLOBALMV;
5889   mbmi->motion_mode = SIMPLE_TRANSLATION;
5890   mbmi->uv_mode = UV_DC_PRED;
5891   if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME))
5892     mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME);
5893   else
5894     mbmi->ref_frame[0] = LAST_FRAME;
5895   mbmi->ref_frame[1] = NONE_FRAME;
5896   mbmi->mv[0].as_int =
5897       gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]],
5898                            features->allow_high_precision_mv, bsize, mi_col,
5899                            mi_row, features->cur_frame_force_integer_mv)
5900           .as_int;
5901   mbmi->tx_size = max_txsize_lookup[bsize];
5902   x->txfm_search_info.skip_txfm = 1;
5903 
5904   mbmi->ref_mv_idx = 0;
5905 
5906   mbmi->motion_mode = SIMPLE_TRANSLATION;
5907   av1_count_overlappable_neighbors(cm, xd);
5908   if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) {
5909     int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE];
5910     mbmi->num_proj_ref = av1_findSamples(cm, xd, pts, pts_inref);
5911     // Select the samples according to motion vector difference
5912     if (mbmi->num_proj_ref > 1) {
5913       mbmi->num_proj_ref = av1_selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref,
5914                                              mbmi->num_proj_ref, bsize);
5915     }
5916   }
5917 
5918   const InterpFilter interp_filter = features->interp_filter;
5919   set_default_interp_filters(mbmi, interp_filter);
5920 
5921   if (interp_filter != SWITCHABLE) {
5922     best_filter = interp_filter;
5923   } else {
5924     best_filter = EIGHTTAP_REGULAR;
5925     if (av1_is_interp_needed(xd)) {
5926       int rs;
5927       int best_rs = INT_MAX;
5928       for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
5929         mbmi->interp_filters = av1_broadcast_interp_filter(i);
5930         rs = av1_get_switchable_rate(x, xd, interp_filter,
5931                                      cm->seq_params->enable_dual_filter);
5932         if (rs < best_rs) {
5933           best_rs = rs;
5934           best_filter = mbmi->interp_filters.as_filters.y_filter;
5935         }
5936       }
5937     }
5938   }
5939   // Set the appropriate filter
5940   mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
5941   rate2 += av1_get_switchable_rate(x, xd, interp_filter,
5942                                    cm->seq_params->enable_dual_filter);
5943 
5944   if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
5945     rate2 += comp_inter_cost[comp_pred];
5946 
5947   // Estimate the reference frame signaling cost and add it
5948   // to the rolling cost variable.
5949   rate2 += ref_costs_single[LAST_FRAME];
5950   this_rd = RDCOST(x->rdmult, rate2, distortion2);
5951 
5952   rd_cost->rate = rate2;
5953   rd_cost->dist = distortion2;
5954   rd_cost->rdcost = this_rd;
5955 
5956   if (this_rd >= best_rd_so_far) {
5957     rd_cost->rate = INT_MAX;
5958     rd_cost->rdcost = INT64_MAX;
5959     return;
5960   }
5961 
5962   assert((interp_filter == SWITCHABLE) ||
5963          (interp_filter == mbmi->interp_filters.as_filters.y_filter));
5964 
5965   if (cpi->sf.inter_sf.adaptive_rd_thresh) {
5966     av1_update_rd_thresh_fact(cm, x->thresh_freq_fact,
5967                               cpi->sf.inter_sf.adaptive_rd_thresh, bsize,
5968                               THR_GLOBALMV, THR_INTER_MODE_START,
5969                               THR_INTER_MODE_END, THR_DC, MAX_MODES);
5970   }
5971 
5972   av1_zero(best_pred_diff);
5973 
5974 #if CONFIG_INTERNAL_STATS
5975   store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0);
5976 #else
5977   store_coding_context(x, ctx, best_pred_diff, 0);
5978 #endif  // CONFIG_INTERNAL_STATS
5979 }
5980 
5981 /*!\cond */
5982 struct calc_target_weighted_pred_ctxt {
5983   const OBMCBuffer *obmc_buffer;
5984   const uint8_t *tmp;
5985   int tmp_stride;
5986   int overlap;
5987 };
5988 /*!\endcond */
5989 
calc_target_weighted_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)5990 static INLINE void calc_target_weighted_pred_above(
5991     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
5992     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
5993   (void)nb_mi;
5994   (void)num_planes;
5995   (void)rel_mi_row;
5996   (void)dir;
5997 
5998   struct calc_target_weighted_pred_ctxt *ctxt =
5999       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6000 
6001   const int bw = xd->width << MI_SIZE_LOG2;
6002   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6003 
6004   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_col * MI_SIZE);
6005   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_col * MI_SIZE);
6006   const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE;
6007   const int is_hbd = is_cur_buf_hbd(xd);
6008 
6009   if (!is_hbd) {
6010     for (int row = 0; row < ctxt->overlap; ++row) {
6011       const uint8_t m0 = mask1d[row];
6012       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6013       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6014         wsrc[col] = m1 * tmp[col];
6015         mask[col] = m0;
6016       }
6017       wsrc += bw;
6018       mask += bw;
6019       tmp += ctxt->tmp_stride;
6020     }
6021   } else {
6022     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6023 
6024     for (int row = 0; row < ctxt->overlap; ++row) {
6025       const uint8_t m0 = mask1d[row];
6026       const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6027       for (int col = 0; col < op_mi_size * MI_SIZE; ++col) {
6028         wsrc[col] = m1 * tmp16[col];
6029         mask[col] = m0;
6030       }
6031       wsrc += bw;
6032       mask += bw;
6033       tmp16 += ctxt->tmp_stride;
6034     }
6035   }
6036 }
6037 
calc_target_weighted_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * nb_mi,void * fun_ctxt,const int num_planes)6038 static INLINE void calc_target_weighted_pred_left(
6039     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
6040     int dir, MB_MODE_INFO *nb_mi, void *fun_ctxt, const int num_planes) {
6041   (void)nb_mi;
6042   (void)num_planes;
6043   (void)rel_mi_col;
6044   (void)dir;
6045 
6046   struct calc_target_weighted_pred_ctxt *ctxt =
6047       (struct calc_target_weighted_pred_ctxt *)fun_ctxt;
6048 
6049   const int bw = xd->width << MI_SIZE_LOG2;
6050   const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap);
6051 
6052   int32_t *wsrc = ctxt->obmc_buffer->wsrc + (rel_mi_row * MI_SIZE * bw);
6053   int32_t *mask = ctxt->obmc_buffer->mask + (rel_mi_row * MI_SIZE * bw);
6054   const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride);
6055   const int is_hbd = is_cur_buf_hbd(xd);
6056 
6057   if (!is_hbd) {
6058     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6059       for (int col = 0; col < ctxt->overlap; ++col) {
6060         const uint8_t m0 = mask1d[col];
6061         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6062         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6063                     (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6064         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6065       }
6066       wsrc += bw;
6067       mask += bw;
6068       tmp += ctxt->tmp_stride;
6069     }
6070   } else {
6071     const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp);
6072 
6073     for (int row = 0; row < op_mi_size * MI_SIZE; ++row) {
6074       for (int col = 0; col < ctxt->overlap; ++col) {
6075         const uint8_t m0 = mask1d[col];
6076         const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0;
6077         wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 +
6078                     (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1;
6079         mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0;
6080       }
6081       wsrc += bw;
6082       mask += bw;
6083       tmp16 += ctxt->tmp_stride;
6084     }
6085   }
6086 }
6087 
6088 // This function has a structure similar to av1_build_obmc_inter_prediction
6089 //
6090 // The OBMC predictor is computed as:
6091 //
6092 //  PObmc(x,y) =
6093 //    AOM_BLEND_A64(Mh(x),
6094 //                  AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)),
6095 //                  PLeft(x, y))
6096 //
6097 // Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate
6098 // rounding, this can be written as:
6099 //
6100 //  AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) =
6101 //    Mh(x) * Mv(y) * P(x,y) +
6102 //      Mh(x) * Cv(y) * Pabove(x,y) +
6103 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6104 //
6105 // Where :
6106 //
6107 //  Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y)
6108 //  Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y)
6109 //
6110 // This function computes 'wsrc' and 'mask' as:
6111 //
6112 //  wsrc(x, y) =
6113 //    AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) -
6114 //      Mh(x) * Cv(y) * Pabove(x,y) +
6115 //      AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y)
6116 //
6117 //  mask(x, y) = Mh(x) * Mv(y)
6118 //
6119 // These can then be used to efficiently approximate the error for any
6120 // predictor P in the context of the provided neighbouring predictors by
6121 // computing:
6122 //
6123 //  error(x, y) =
6124 //    wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2)
6125 //
calc_target_weighted_pred(const AV1_COMMON * cm,const MACROBLOCK * x,const MACROBLOCKD * xd,const uint8_t * above,int above_stride,const uint8_t * left,int left_stride)6126 static AOM_INLINE void calc_target_weighted_pred(
6127     const AV1_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd,
6128     const uint8_t *above, int above_stride, const uint8_t *left,
6129     int left_stride) {
6130   const BLOCK_SIZE bsize = xd->mi[0]->bsize;
6131   const int bw = xd->width << MI_SIZE_LOG2;
6132   const int bh = xd->height << MI_SIZE_LOG2;
6133   const OBMCBuffer *obmc_buffer = &x->obmc_buffer;
6134   int32_t *mask_buf = obmc_buffer->mask;
6135   int32_t *wsrc_buf = obmc_buffer->wsrc;
6136 
6137   const int is_hbd = is_cur_buf_hbd(xd);
6138   const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA;
6139 
6140   // plane 0 should not be sub-sampled
6141   assert(xd->plane[0].subsampling_x == 0);
6142   assert(xd->plane[0].subsampling_y == 0);
6143 
6144   av1_zero_array(wsrc_buf, bw * bh);
6145   for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA;
6146 
6147   // handle above row
6148   if (xd->up_available) {
6149     const int overlap =
6150         AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
6151     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, above,
6152                                                    above_stride, overlap };
6153     foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd,
6154                                   max_neighbor_obmc[mi_size_wide_log2[bsize]],
6155                                   calc_target_weighted_pred_above, &ctxt);
6156   }
6157 
6158   for (int i = 0; i < bw * bh; ++i) {
6159     wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6160     mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA;
6161   }
6162 
6163   // handle left column
6164   if (xd->left_available) {
6165     const int overlap =
6166         AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
6167     struct calc_target_weighted_pred_ctxt ctxt = { obmc_buffer, left,
6168                                                    left_stride, overlap };
6169     foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd,
6170                                  max_neighbor_obmc[mi_size_high_log2[bsize]],
6171                                  calc_target_weighted_pred_left, &ctxt);
6172   }
6173 
6174   if (!is_hbd) {
6175     const uint8_t *src = x->plane[0].src.buf;
6176 
6177     for (int row = 0; row < bh; ++row) {
6178       for (int col = 0; col < bw; ++col) {
6179         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6180       }
6181       wsrc_buf += bw;
6182       src += x->plane[0].src.stride;
6183     }
6184   } else {
6185     const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf);
6186 
6187     for (int row = 0; row < bh; ++row) {
6188       for (int col = 0; col < bw; ++col) {
6189         wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col];
6190       }
6191       wsrc_buf += bw;
6192       src += x->plane[0].src.stride;
6193     }
6194   }
6195 }
6196 
6197 /* Use standard 3x3 Sobel matrix. Macro so it can be used for either high or
6198    low bit-depth arrays. */
6199 #define SOBEL_X(src, stride, i, j)                       \
6200   ((src)[((i)-1) + (stride) * ((j)-1)] -                 \
6201    (src)[((i) + 1) + (stride) * ((j)-1)] +  /* NOLINT */ \
6202    2 * (src)[((i)-1) + (stride) * (j)] -    /* NOLINT */ \
6203    2 * (src)[((i) + 1) + (stride) * (j)] +  /* NOLINT */ \
6204    (src)[((i)-1) + (stride) * ((j) + 1)] -  /* NOLINT */ \
6205    (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6206 #define SOBEL_Y(src, stride, i, j)                       \
6207   ((src)[((i)-1) + (stride) * ((j)-1)] +                 \
6208    2 * (src)[(i) + (stride) * ((j)-1)] +    /* NOLINT */ \
6209    (src)[((i) + 1) + (stride) * ((j)-1)] -  /* NOLINT */ \
6210    (src)[((i)-1) + (stride) * ((j) + 1)] -  /* NOLINT */ \
6211    2 * (src)[(i) + (stride) * ((j) + 1)] -  /* NOLINT */ \
6212    (src)[((i) + 1) + (stride) * ((j) + 1)]) /* NOLINT */
6213 
av1_sobel(const uint8_t * input,int stride,int i,int j,bool high_bd)6214 sobel_xy av1_sobel(const uint8_t *input, int stride, int i, int j,
6215                    bool high_bd) {
6216   int16_t s_x;
6217   int16_t s_y;
6218   if (high_bd) {
6219     const uint16_t *src = CONVERT_TO_SHORTPTR(input);
6220     s_x = SOBEL_X(src, stride, i, j);
6221     s_y = SOBEL_Y(src, stride, i, j);
6222   } else {
6223     s_x = SOBEL_X(input, stride, i, j);
6224     s_y = SOBEL_Y(input, stride, i, j);
6225   }
6226   sobel_xy r = { .x = s_x, .y = s_y };
6227   return r;
6228 }
6229 
6230 // 8-tap Gaussian convolution filter with sigma = 1.3, sums to 128,
6231 // all co-efficients must be even.
6232 DECLARE_ALIGNED(16, static const int16_t, gauss_filter[8]) = { 2,  12, 30, 40,
6233                                                                30, 12, 2,  0 };
6234 
av1_gaussian_blur(const uint8_t * src,int src_stride,int w,int h,uint8_t * dst,bool high_bd,int bd)6235 void av1_gaussian_blur(const uint8_t *src, int src_stride, int w, int h,
6236                        uint8_t *dst, bool high_bd, int bd) {
6237   ConvolveParams conv_params = get_conv_params(0, 0, bd);
6238   InterpFilterParams filter = { .filter_ptr = gauss_filter,
6239                                 .taps = 8,
6240                                 .interp_filter = EIGHTTAP_REGULAR };
6241   // Requirements from the vector-optimized implementations.
6242   assert(h % 4 == 0);
6243   assert(w % 8 == 0);
6244   // Because we use an eight tap filter, the stride should be at least 7 + w.
6245   assert(src_stride >= w + 7);
6246 #if CONFIG_AV1_HIGHBITDEPTH
6247   if (high_bd) {
6248     av1_highbd_convolve_2d_sr(CONVERT_TO_SHORTPTR(src), src_stride,
6249                               CONVERT_TO_SHORTPTR(dst), w, w, h, &filter,
6250                               &filter, 0, 0, &conv_params, bd);
6251   } else {
6252     av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6253                        &conv_params);
6254   }
6255 #else
6256   (void)high_bd;
6257   av1_convolve_2d_sr(src, src_stride, dst, w, w, h, &filter, &filter, 0, 0,
6258                      &conv_params);
6259 #endif
6260 }
6261 
edge_probability(const uint8_t * input,int w,int h,bool high_bd,int bd)6262 static EdgeInfo edge_probability(const uint8_t *input, int w, int h,
6263                                  bool high_bd, int bd) {
6264   // The probability of an edge in the whole image is the same as the highest
6265   // probability of an edge for any individual pixel. Use Sobel as the metric
6266   // for finding an edge.
6267   uint16_t highest = 0;
6268   uint16_t highest_x = 0;
6269   uint16_t highest_y = 0;
6270   // Ignore the 1 pixel border around the image for the computation.
6271   for (int j = 1; j < h - 1; ++j) {
6272     for (int i = 1; i < w - 1; ++i) {
6273       sobel_xy g = av1_sobel(input, w, i, j, high_bd);
6274       // Scale down to 8-bit to get same output regardless of bit depth.
6275       int16_t g_x = g.x >> (bd - 8);
6276       int16_t g_y = g.y >> (bd - 8);
6277       uint16_t magnitude = (uint16_t)sqrt(g_x * g_x + g_y * g_y);
6278       highest = AOMMAX(highest, magnitude);
6279       highest_x = AOMMAX(highest_x, g_x);
6280       highest_y = AOMMAX(highest_y, g_y);
6281     }
6282   }
6283   EdgeInfo ei = { .magnitude = highest, .x = highest_x, .y = highest_y };
6284   return ei;
6285 }
6286 
6287 /* Uses most of the Canny edge detection algorithm to find if there are any
6288  * edges in the image.
6289  */
av1_edge_exists(const uint8_t * src,int src_stride,int w,int h,bool high_bd,int bd)6290 EdgeInfo av1_edge_exists(const uint8_t *src, int src_stride, int w, int h,
6291                          bool high_bd, int bd) {
6292   if (w < 3 || h < 3) {
6293     EdgeInfo n = { .magnitude = 0, .x = 0, .y = 0 };
6294     return n;
6295   }
6296   uint8_t *blurred;
6297   if (high_bd) {
6298     blurred = CONVERT_TO_BYTEPTR(aom_memalign(32, sizeof(uint16_t) * w * h));
6299   } else {
6300     blurred = (uint8_t *)aom_memalign(32, sizeof(uint8_t) * w * h);
6301   }
6302   av1_gaussian_blur(src, src_stride, w, h, blurred, high_bd, bd);
6303   // Skip the non-maximum suppression step in Canny edge detection. We just
6304   // want a probability of an edge existing in the buffer, which is determined
6305   // by the strongest edge in it -- we don't need to eliminate the weaker
6306   // edges. Use Sobel for the edge detection.
6307   EdgeInfo prob = edge_probability(blurred, w, h, high_bd, bd);
6308   if (high_bd) {
6309     aom_free(CONVERT_TO_SHORTPTR(blurred));
6310   } else {
6311     aom_free(blurred);
6312   }
6313   return prob;
6314 }
6315